Skip to content

Commit

Permalink
Merge pull request #40 from henrivain/master
Browse files Browse the repository at this point in the history
Fix #38
  • Loading branch information
henrivain authored Feb 20, 2024
2 parents b9532a4 + f15fc48 commit 37ae179
Show file tree
Hide file tree
Showing 13 changed files with 166 additions and 107 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ dotnet add package TesseractOcrMaui
3. By package reference

```xml
<PackageReference Include="TesseractOcrMaui" Version="1.1.0" />
<PackageReference Include="TesseractOcrMaui" Version="1.1.6" />
```

### 2. Add package to dependency injection (see TesseractOcrMauiTestApp)
Expand Down
4 changes: 2 additions & 2 deletions TesseractOcrMaui.IOS/Properties/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ This package `TesseractOcrMaui.IOS` only includes bindings for iOS and should no
2. Package reference

```xml
<PackageReference Include="TesseractOcrMaui" Version="1.1.0" />
<PackageReference Include="TesseractOcrMaui" Version="1.1.6" />
```

3. Dotnet CLI

```ps
dotnet add package TesseractOcrMaui --version 1.1.0
dotnet add package TesseractOcrMaui --version 1.1.6
```

Note that you should check what the current package version is and use it in your command.
Expand Down
21 changes: 19 additions & 2 deletions TesseractOcrMaui.IOS/TesseractApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,18 @@ public sealed partial class TesseractApi

const CharSet StrEncoding = CharSet.Ansi;

[LibraryImport(DllName, EntryPoint = "TessDeleteTextArray")]
[UnmanagedCallConv(CallConvs = new Type[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
public static unsafe partial void DeleteStringArray(char** ptr);

[LibraryImport(DllName, EntryPoint = "TessDeleteIntArray")]
[UnmanagedCallConv(CallConvs = new Type[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
public static partial void DeleteIntArray(IntPtr ptr);

[LibraryImport(DllName, EntryPoint = "TessDeleteText")]
[UnmanagedCallConv(CallConvs = new Type[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
public static partial void DeleteString(IntPtr ptr);

[LibraryImport(DllName, EntryPoint = "TessBaseAPICreate")]
[UnmanagedCallConv(CallConvs = new Type[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
public static partial IntPtr CreateApi();
Expand All @@ -36,9 +48,14 @@ public extern static int BaseApi5Init(HandleRef handle, string datapath, int dat

[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPISetImage2")]
public static extern void SetImage(HandleRef handle, HandleRef pixHandle);


// This does not work with non acsii characters, use GetUTF8Text_Ptr instead
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIGetUTF8Text", CharSet = StrEncoding)]
public static extern string GetUTF8Text(HandleRef handle);
public static extern string GetUTF8Text(HandleRef handle);

// Remember to delete string after copying, use DeleteString(IntPtr ptr)
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIGetUTF8Text")]
public static extern IntPtr GetUTF8Text_Ptr(HandleRef handle);

[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIAllWordConfidences")]
public static extern int[] GetConfidences(HandleRef handle);
Expand Down
2 changes: 1 addition & 1 deletion TesseractOcrMaui.IOS/TesseractOcrMaui.IOS.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

<!-- Package properties -->
<PropertyGroup>
<Version>1.0.5</Version>
<Version>1.0.6</Version>
<Title>Maui Tesseract ocr iOS bindings</Title>
<PackageId>TesseractOcrMaui.IOS</PackageId>
<Authors>henrivain</Authors>
Expand Down
3 changes: 2 additions & 1 deletion TesseractOcrMaui/Enums/RecognizionStatus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ public enum RecognizionStatus
InvalidImage,
ImageAlredyProcessed,
CannotRecognizeText,
TessDataFolderNotProvided
TessDataFolderNotProvided,
InvalidResultString
}
36 changes: 36 additions & 0 deletions TesseractOcrMaui/Exceptions/PageNotDisposedException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace TesseractOcrMaui.Exceptions;

/// <summary>
/// Exception thrown when multiple images are given to TessEngine at the same time.
/// Old TessPage must be disposed before trying to process new image.
/// </summary>
public class PageNotDisposedException : TesseractException
{
/// <summary>
/// New exception thrown when multiple images are given to TessEngine at the same time.
/// Old TessPage must be disposed before trying to process new image.
/// </summary>
public PageNotDisposedException() { }

/// <summary>
/// New exception with message thrown when multiple images are given to TessEngine at the same time.
/// Old TessPage must be disposed before trying to process new image.
/// </summary>
/// <param name="message">Error reason</param>
public PageNotDisposedException(string message) : base(message) { }

/// <summary>
/// New exception with message and inner exception thrown when multiple images are given to TessEngine at the same time.
/// Old TessPage must be disposed before trying to process new image.
/// </summary>
/// <param name="message">Error reason</param>
/// <param name="innerException">Exception that caused this exception</param>
public PageNotDisposedException(string message, Exception innerException)
: base(message, innerException) { }
}
29 changes: 29 additions & 0 deletions TesseractOcrMaui/Exceptions/StringMarshallingException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
namespace TesseractOcrMaui.Exceptions;

/// <summary>
/// Exception thrown when library cannot marshal string returned from native library correctly.
/// </summary>
public class StringMarshallingException : TesseractException
{
/// <summary>
/// New Exception thrown when library cannot marshal string returned from native library correctly.
/// </summary>
public StringMarshallingException() : base() { }

/// <summary>
/// New Exception with message thrown when library cannot marshal string returned from native library correctly.
/// </summary>
/// <param name="message">Error reason.</param>
public StringMarshallingException(string? message) : base(message) { }

/// <summary>
/// New Exception with message and inner exception.
/// Thrown when library cannot marshal string returned from native library correctly.
/// </summary>
/// <param name="message">Error reason.</param>
/// <param name="innerException">Exception that caused this error.</param>
public StringMarshallingException(string? message, Exception? innerException) : base(message, innerException)
{
}

}
14 changes: 12 additions & 2 deletions TesseractOcrMaui/ImportApis/TesseractApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,14 @@ internal sealed partial class TesseractApi
const string DllName = "Use Windows, Android or iOS Platform";
#endif


const CharSet StrEncoding = CharSet.Ansi;



[LibraryImport(DllName, EntryPoint = "TessDeleteText")]
[UnmanagedCallConv(CallConvs = new Type[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
public static partial void DeleteString(IntPtr ptr);

[LibraryImport(DllName, EntryPoint = "TessBaseAPICreate")]
[UnmanagedCallConv(CallConvs = new Type[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
public static partial IntPtr CreateApi();
Expand All @@ -47,10 +52,15 @@ public extern static int BaseApi5Init(HandleRef handle, string datapath, int dat

[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPISetImage2")]
public static extern void SetImage(HandleRef handle, HandleRef pixHandle);


// This does not work with non acsii characters, use GetUTF8Text_Ptr instead
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIGetUTF8Text", CharSet = StrEncoding)]
public static extern string GetUTF8Text(HandleRef handle);

// Remember to delete string after copying, use DeleteString(IntPtr ptr)
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIGetUTF8Text")]
public static extern IntPtr GetUTF8Text_Ptr(HandleRef handle);

[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIAllWordConfidences")]
public static extern int[] GetConfidences(HandleRef handle);

Expand Down
5 changes: 5 additions & 0 deletions TesseractOcrMaui/Results/RecognizionResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ public RecognizionResult() { }
/// </summary>
public string? Message { get; init; }

/// <summary>
/// Optional exception that was thrown if failed.
/// </summary>
public Exception? Exception { get; init; }

/// <summary>
/// Recognized text from the image.
/// </summary>
Expand Down
17 changes: 7 additions & 10 deletions TesseractOcrMaui/TessEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,9 @@ public TessEngine(string languages, string traineddataPath, EngineMode mode,
/// <returns>New Tess page containing information for recognizion.</returns>
/// <exception cref="ArgumentNullException">image is null.</exception>
/// <exception cref="ArgumentException">Image width or height has invalid values.</exception>
/// <exception cref="InvalidOperationException">Image already processed. You must dispose page after using.</exception>
public TessPage ProcessImage(Pix image, PageSegmentationMode? mode = null)
{
return ProcessImage(image, null, new Rect(0, 0, image.Width, image.Height), mode);
}
/// <exception cref="PageNotDisposedException">Image already processed. You must dispose page after using.</exception>
public TessPage ProcessImage(Pix image, PageSegmentationMode? mode = null)
=> ProcessImage(image, null, new Rect(0, 0, image.Width, image.Height), mode);

/// <summary>
/// Process image to TessPage.
Expand All @@ -114,7 +112,7 @@ public TessPage ProcessImage(Pix image, PageSegmentationMode? mode = null)
/// <returns>New Tess page containing information for recognizion.</returns>
/// <exception cref="ArgumentNullException">image is null.</exception>
/// <exception cref="ArgumentException">Region is out of bounds.</exception>
/// <exception cref="InvalidOperationException">Image already processed. You must dispose page after using.</exception>
/// <exception cref="PageNotDisposedException">Image already processed. You must dispose page after using.</exception>
public TessPage ProcessImage(Pix image, string? inputName, Rect region, PageSegmentationMode? mode)
{
if (image is null)
Expand All @@ -130,10 +128,9 @@ public TessPage ProcessImage(Pix image, string? inputName, Rect region, PageSegm
}
if (_processCount > 0)
{
_logger.LogError("{cls}: Tried to process image with engine that already has one. " +
"You must dispose image after using.", nameof(TessEngine));
throw new InvalidOperationException("One image already set. " +
"You must dispose page after using.");
_logger.LogError("{cls}: Already has one image process. You must dispose {page} after using it.",
nameof(TessEngine), nameof(TessPage));
throw new PageNotDisposedException("You must dispose old TessPage after using it.");
}

_processCount++;
Expand Down
43 changes: 19 additions & 24 deletions TesseractOcrMaui/TessPage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,44 +75,39 @@ public TessPage(TessEngine engine, Pix image, string? inputName, Rect region, Pa
/// <summary>
/// Get text from image. Runs recognizion if it is not already done. Uses UTF-8.
/// </summary>
/// <returns>Text that apprears in image.</returns>
/// <returns>Recognized text as UTF-8 string</returns>
/// <exception cref="InvalidOperationException">PageSegmentationMode is OsdOnly when recognizing.</exception>
/// <exception cref="ImageRecognizionException">Native Library call returns failed status when recognizing.</exception>
/// <exception cref="TesseractException">Can't get thresholded image when recognizing.</exception>
/// <exception cref="InvalidBytesException">[WINDOWS] Invalid byte sequence in string.</exception>
/// <exception cref="StringMarshallingException">
/// When recognizion result string pointer is nullpointer or the pointer cannot
/// be marshalled into UTF-8 string.
/// </exception>
public string GetText()
{
Logger.LogInformation("Try to get text from image.");

Recognize();

string result = TesseractApi.GetUTF8Text(Engine.Handle);

Logger.LogInformation("Found '{count}' characters in image.", result.Length);

// My Windows seems to use different encoding than UTF-8 by default, so this should help.
// Android uses UTF-8 as default so all good.
#if WINDOWS
var bytes = new byte[result.Length];
for (int i = 0; i < result.Length; i++)
{
bytes[i] = (byte)result[i];
}
if (bytes is null)
{
return string.Empty;
}
try
IntPtr ptr = TesseractApi.GetUTF8Text_Ptr(Engine.Handle);
if (ptr == IntPtr.Zero)
{
return Encoding.UTF8.GetString(bytes);
Logger.LogError("Recognizion result string cannot be marshalled from null pointer.");
throw new StringMarshallingException("String cannot be marshalled from null pointer.");
}
catch (Exception ex)

string? result = Marshal.PtrToStringUTF8(ptr);
TesseractApi.DeleteString(ptr);

if (result is null)
{
throw new InvalidBytesException("Cannot encode current byte array, because it contains invalid bytes.", ex);
Logger.LogError("Cannot encode char* to UTF-8 string.");
throw new StringMarshallingException("Could not encode recognizion result string to UTF-8.");
}
#else

Logger.LogInformation("Found '{count}' characters in image.", result.Length);

return result;
#endif
}

/// <summary>
Expand Down
Loading

0 comments on commit 37ae179

Please sign in to comment.