Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #38 #40

Merged
merged 4 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ dotnet add package TesseractOcrMaui
3. By package reference

```xml
<PackageReference Include="TesseractOcrMaui" Version="1.1.0" />
<PackageReference Include="TesseractOcrMaui" Version="1.1.6" />
```

### 2. Add package to dependency injection (see TesseractOcrMauiTestApp)
Expand Down
4 changes: 2 additions & 2 deletions TesseractOcrMaui.IOS/Properties/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ This package `TesseractOcrMaui.IOS` only includes bindings for iOS and should no
2. Package reference

```xml
<PackageReference Include="TesseractOcrMaui" Version="1.1.0" />
<PackageReference Include="TesseractOcrMaui" Version="1.1.6" />
```

3. Dotnet CLI

```ps
dotnet add package TesseractOcrMaui --version 1.1.0
dotnet add package TesseractOcrMaui --version 1.1.6
```

Note that you should check what the current package version is and use it in your command.
Expand Down
21 changes: 19 additions & 2 deletions TesseractOcrMaui.IOS/TesseractApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,18 @@ public sealed partial class TesseractApi

const CharSet StrEncoding = CharSet.Ansi;

[LibraryImport(DllName, EntryPoint = "TessDeleteTextArray")]
[UnmanagedCallConv(CallConvs = new Type[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
public static unsafe partial void DeleteStringArray(char** ptr);

[LibraryImport(DllName, EntryPoint = "TessDeleteIntArray")]
[UnmanagedCallConv(CallConvs = new Type[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
public static partial void DeleteIntArray(IntPtr ptr);

[LibraryImport(DllName, EntryPoint = "TessDeleteText")]
[UnmanagedCallConv(CallConvs = new Type[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
public static partial void DeleteString(IntPtr ptr);

[LibraryImport(DllName, EntryPoint = "TessBaseAPICreate")]
[UnmanagedCallConv(CallConvs = new Type[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
public static partial IntPtr CreateApi();
Expand All @@ -36,9 +48,14 @@ public extern static int BaseApi5Init(HandleRef handle, string datapath, int dat

[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPISetImage2")]
public static extern void SetImage(HandleRef handle, HandleRef pixHandle);


// This does not work with non acsii characters, use GetUTF8Text_Ptr instead
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIGetUTF8Text", CharSet = StrEncoding)]
public static extern string GetUTF8Text(HandleRef handle);
public static extern string GetUTF8Text(HandleRef handle);

// Remember to delete string after copying, use DeleteString(IntPtr ptr)
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIGetUTF8Text")]
public static extern IntPtr GetUTF8Text_Ptr(HandleRef handle);

[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIAllWordConfidences")]
public static extern int[] GetConfidences(HandleRef handle);
Expand Down
2 changes: 1 addition & 1 deletion TesseractOcrMaui.IOS/TesseractOcrMaui.IOS.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

<!-- Package properties -->
<PropertyGroup>
<Version>1.0.5</Version>
<Version>1.0.6</Version>
<Title>Maui Tesseract ocr iOS bindings</Title>
<PackageId>TesseractOcrMaui.IOS</PackageId>
<Authors>henrivain</Authors>
Expand Down
3 changes: 2 additions & 1 deletion TesseractOcrMaui/Enums/RecognizionStatus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ public enum RecognizionStatus
InvalidImage,
ImageAlredyProcessed,
CannotRecognizeText,
TessDataFolderNotProvided
TessDataFolderNotProvided,
InvalidResultString
}
36 changes: 36 additions & 0 deletions TesseractOcrMaui/Exceptions/PageNotDisposedException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace TesseractOcrMaui.Exceptions;

/// <summary>
/// Exception thrown when multiple images are given to TessEngine at the same time.
/// Old TessPage must be disposed before trying to process new image.
/// </summary>
public class PageNotDisposedException : TesseractException
{
/// <summary>
/// New exception thrown when multiple images are given to TessEngine at the same time.
/// Old TessPage must be disposed before trying to process new image.
/// </summary>
public PageNotDisposedException() { }

/// <summary>
/// New exception with message thrown when multiple images are given to TessEngine at the same time.
/// Old TessPage must be disposed before trying to process new image.
/// </summary>
/// <param name="message">Error reason</param>
public PageNotDisposedException(string message) : base(message) { }

/// <summary>
/// New exception with message and inner exception thrown when multiple images are given to TessEngine at the same time.
/// Old TessPage must be disposed before trying to process new image.
/// </summary>
/// <param name="message">Error reason</param>
/// <param name="innerException">Exception that caused this exception</param>
public PageNotDisposedException(string message, Exception innerException)
: base(message, innerException) { }
}
29 changes: 29 additions & 0 deletions TesseractOcrMaui/Exceptions/StringMarshallingException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
namespace TesseractOcrMaui.Exceptions;

/// <summary>
/// Exception thrown when library cannot marshal string returned from native library correctly.
/// </summary>
public class StringMarshallingException : TesseractException
{
/// <summary>
/// New Exception thrown when library cannot marshal string returned from native library correctly.
/// </summary>
public StringMarshallingException() : base() { }

/// <summary>
/// New Exception with message thrown when library cannot marshal string returned from native library correctly.
/// </summary>
/// <param name="message">Error reason.</param>
public StringMarshallingException(string? message) : base(message) { }

/// <summary>
/// New Exception with message and inner exception.
/// Thrown when library cannot marshal string returned from native library correctly.
/// </summary>
/// <param name="message">Error reason.</param>
/// <param name="innerException">Exception that caused this error.</param>
public StringMarshallingException(string? message, Exception? innerException) : base(message, innerException)
{
}

}
14 changes: 12 additions & 2 deletions TesseractOcrMaui/ImportApis/TesseractApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,14 @@ internal sealed partial class TesseractApi
const string DllName = "Use Windows, Android or iOS Platform";
#endif


const CharSet StrEncoding = CharSet.Ansi;



[LibraryImport(DllName, EntryPoint = "TessDeleteText")]
[UnmanagedCallConv(CallConvs = new Type[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
public static partial void DeleteString(IntPtr ptr);

[LibraryImport(DllName, EntryPoint = "TessBaseAPICreate")]
[UnmanagedCallConv(CallConvs = new Type[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
public static partial IntPtr CreateApi();
Expand All @@ -47,10 +52,15 @@ public extern static int BaseApi5Init(HandleRef handle, string datapath, int dat

[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPISetImage2")]
public static extern void SetImage(HandleRef handle, HandleRef pixHandle);


// This does not work with non acsii characters, use GetUTF8Text_Ptr instead
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIGetUTF8Text", CharSet = StrEncoding)]
public static extern string GetUTF8Text(HandleRef handle);

// Remember to delete string after copying, use DeleteString(IntPtr ptr)
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIGetUTF8Text")]
public static extern IntPtr GetUTF8Text_Ptr(HandleRef handle);

[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIAllWordConfidences")]
public static extern int[] GetConfidences(HandleRef handle);

Expand Down
5 changes: 5 additions & 0 deletions TesseractOcrMaui/Results/RecognizionResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ public RecognizionResult() { }
/// </summary>
public string? Message { get; init; }

/// <summary>
/// Optional exception that was thrown if failed.
/// </summary>
public Exception? Exception { get; init; }

/// <summary>
/// Recognized text from the image.
/// </summary>
Expand Down
17 changes: 7 additions & 10 deletions TesseractOcrMaui/TessEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,9 @@ public TessEngine(string languages, string traineddataPath, EngineMode mode,
/// <returns>New Tess page containing information for recognizion.</returns>
/// <exception cref="ArgumentNullException">image is null.</exception>
/// <exception cref="ArgumentException">Image width or height has invalid values.</exception>
/// <exception cref="InvalidOperationException">Image already processed. You must dispose page after using.</exception>
public TessPage ProcessImage(Pix image, PageSegmentationMode? mode = null)
{
return ProcessImage(image, null, new Rect(0, 0, image.Width, image.Height), mode);
}
/// <exception cref="PageNotDisposedException">Image already processed. You must dispose page after using.</exception>
public TessPage ProcessImage(Pix image, PageSegmentationMode? mode = null)
=> ProcessImage(image, null, new Rect(0, 0, image.Width, image.Height), mode);

/// <summary>
/// Process image to TessPage.
Expand All @@ -114,7 +112,7 @@ public TessPage ProcessImage(Pix image, PageSegmentationMode? mode = null)
/// <returns>New Tess page containing information for recognizion.</returns>
/// <exception cref="ArgumentNullException">image is null.</exception>
/// <exception cref="ArgumentException">Region is out of bounds.</exception>
/// <exception cref="InvalidOperationException">Image already processed. You must dispose page after using.</exception>
/// <exception cref="PageNotDisposedException">Image already processed. You must dispose page after using.</exception>
public TessPage ProcessImage(Pix image, string? inputName, Rect region, PageSegmentationMode? mode)
{
if (image is null)
Expand All @@ -130,10 +128,9 @@ public TessPage ProcessImage(Pix image, string? inputName, Rect region, PageSegm
}
if (_processCount > 0)
{
_logger.LogError("{cls}: Tried to process image with engine that already has one. " +
"You must dispose image after using.", nameof(TessEngine));
throw new InvalidOperationException("One image already set. " +
"You must dispose page after using.");
_logger.LogError("{cls}: Already has one image process. You must dispose {page} after using it.",
nameof(TessEngine), nameof(TessPage));
throw new PageNotDisposedException("You must dispose old TessPage after using it.");
}

_processCount++;
Expand Down
43 changes: 19 additions & 24 deletions TesseractOcrMaui/TessPage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,44 +75,39 @@ public TessPage(TessEngine engine, Pix image, string? inputName, Rect region, Pa
/// <summary>
/// Get text from image. Runs recognizion if it is not already done. Uses UTF-8.
/// </summary>
/// <returns>Text that apprears in image.</returns>
/// <returns>Recognized text as UTF-8 string</returns>
/// <exception cref="InvalidOperationException">PageSegmentationMode is OsdOnly when recognizing.</exception>
/// <exception cref="ImageRecognizionException">Native Library call returns failed status when recognizing.</exception>
/// <exception cref="TesseractException">Can't get thresholded image when recognizing.</exception>
/// <exception cref="InvalidBytesException">[WINDOWS] Invalid byte sequence in string.</exception>
/// <exception cref="StringMarshallingException">
/// When recognizion result string pointer is nullpointer or the pointer cannot
/// be marshalled into UTF-8 string.
/// </exception>
public string GetText()
{
Logger.LogInformation("Try to get text from image.");

Recognize();

string result = TesseractApi.GetUTF8Text(Engine.Handle);

Logger.LogInformation("Found '{count}' characters in image.", result.Length);

// My Windows seems to use different encoding than UTF-8 by default, so this should help.
// Android uses UTF-8 as default so all good.
#if WINDOWS
var bytes = new byte[result.Length];
for (int i = 0; i < result.Length; i++)
{
bytes[i] = (byte)result[i];
}
if (bytes is null)
{
return string.Empty;
}
try
IntPtr ptr = TesseractApi.GetUTF8Text_Ptr(Engine.Handle);
if (ptr == IntPtr.Zero)
{
return Encoding.UTF8.GetString(bytes);
Logger.LogError("Recognizion result string cannot be marshalled from null pointer.");
throw new StringMarshallingException("String cannot be marshalled from null pointer.");
}
catch (Exception ex)

string? result = Marshal.PtrToStringUTF8(ptr);
TesseractApi.DeleteString(ptr);

if (result is null)
{
throw new InvalidBytesException("Cannot encode current byte array, because it contains invalid bytes.", ex);
Logger.LogError("Cannot encode char* to UTF-8 string.");
throw new StringMarshallingException("Could not encode recognizion result string to UTF-8.");
}
#else

Logger.LogInformation("Found '{count}' characters in image.", result.Length);

return result;
#endif
}

/// <summary>
Expand Down
Loading