Skip to content

Commit

Permalink
Support for loading config files (workaround for issues #125 & #70)
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesw committed Oct 19, 2014
1 parent af4355f commit 2fc50df
Show file tree
Hide file tree
Showing 11 changed files with 298 additions and 13 deletions.
7 changes: 6 additions & 1 deletion ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,9 @@ due to Mono support which require a version incremment (we use semantic versioni

#### Bug fixes

* Fixed UTF8 handling for SetVariable (support for non-english languages) [Issue 120](https://github.com/charlesw/tesseract/issues/120) & [Issue 68](https://github.com/charlesw/tesseract/issues/68)
* Fixed UTF8 handling for SetVariable (support for non-english languages) [Issue 120](https://github.com/charlesw/tesseract/issues/120) & [Issue 68](https://github.com/charlesw/tesseract/issues/68)

### Version 2.1

* Support for loading config files
* Support for loading Pix from memory
2 changes: 1 addition & 1 deletion build.proj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Package" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Version>2.0.0.1</Version>
<Version>2.1.0.1</Version>
<SourceDir>$(MSBuildProjectDirectory)\src</SourceDir>
<BuildDir>$(MSBuildProjectDirectory)\bin</BuildDir>
<ReleaseDir>$(MSBuildProjectDirectory)\release</ReleaseDir>
Expand Down
2 changes: 1 addition & 1 deletion src/Tesseract.Tests.Console/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ static void Main(string[] args)

try {
var testFixture = new Tesseract.Tests.EngineTests();
testFixture.CanProcessPixUsingResultIterator();
testFixture.Initialise_CanLoadConfigFile();
} catch (Exception e) {
System.Console.WriteLine("Unhandled exception occured: \r\n{0}", e);
}
Expand Down
162 changes: 162 additions & 0 deletions src/Tesseract.Tests/Data/PixToBitmapConverter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.Text;
using SD = System.Drawing;
namespace Tesseract
{
public class PixToBitmapConverter
{
public Bitmap Convert(Pix pix, bool includeAlpha = false)
{
var pixelFormat = GetPixelFormat(pix);
var depth = pix.Depth;
var img = new Bitmap(pix.Width, pix.Height, pixelFormat);

BitmapData imgData = null;
PixData pixData = null;
try {
// TODO: Set X and Y resolution

// transfer pixel data
if ((pixelFormat & PixelFormat.Indexed) == PixelFormat.Indexed) {
TransferPalette(pix, img);
}

// transfer data
pixData = pix.GetData();
imgData = img.LockBits(new Rectangle(0, 0, img.Width, img.Height), ImageLockMode.WriteOnly, pixelFormat);

if (depth == 32) {
TransferData32(pixData, imgData, includeAlpha ? 0 : 255);
} else if (depth == 16) {
TransferData16(pixData, imgData);
} else if (depth == 8) {
TransferData8(pixData, imgData);
} else if (depth == 1) {
TransferData1(pixData, imgData);
}
return img;
} catch (Exception) {
img.Dispose();
throw;
} finally {
if (imgData != null) {
img.UnlockBits(imgData);
}
}
}

private unsafe void TransferData32(PixData pixData, BitmapData imgData, int alphaMask)
{
var imgFormat = imgData.PixelFormat;
var height = imgData.Height;
var width = imgData.Width;

for (int y = 0; y < height; y++) {
byte* imgLine = (byte*)imgData.Scan0 + (y * imgData.Stride);
uint* pixLine = (uint*)pixData.Data + (y * pixData.WordsPerLine);

for (int x = 0; x < width; x++) {
var pixVal = PixColor.FromRgba(pixLine[x]);

byte* pixelPtr = imgLine + (x << 2);
pixelPtr[0] = pixVal.Blue;
pixelPtr[1] = pixVal.Green;
pixelPtr[2] = pixVal.Red;
pixelPtr[3] = (byte)(alphaMask | pixVal.Alpha); // Allow user to include alpha or not
}
}
}

private unsafe void TransferData16(PixData pixData, BitmapData imgData)
{
var imgFormat = imgData.PixelFormat;
var height = imgData.Height;
var width = imgData.Width;

for (int y = 0; y < height; y++) {
uint* pixLine = (uint*)pixData.Data + (y * pixData.WordsPerLine);
ushort* imgLine = (ushort*)imgData.Scan0 + (y * imgData.Stride);

for (int x = 0; x < width; x++) {
ushort pixVal = (ushort)PixData.GetDataTwoByte(pixLine, x);

imgLine[x] = pixVal;
}
}
}

private unsafe void TransferData8(PixData pixData, BitmapData imgData)
{
var imgFormat = imgData.PixelFormat;
var height = imgData.Height;
var width = imgData.Width;

for (int y = 0; y < height; y++) {
uint* pixLine = (uint*)pixData.Data + (y * pixData.WordsPerLine);
byte* imgLine = (byte*)imgData.Scan0 + (y * imgData.Stride);

for (int x = 0; x < width; x++) {
byte pixVal = (byte)PixData.GetDataByte(pixLine, x);

imgLine[x] = pixVal;
}
}
}

private unsafe void TransferData1(PixData pixData, BitmapData imgData)
{
var imgFormat = imgData.PixelFormat;
var height = imgData.Height;
var width = imgData.Width/8;

for (int y = 0; y < height; y++) {
uint* pixLine = (uint*)pixData.Data + (y * pixData.WordsPerLine);
byte* imgLine = (byte*)imgData.Scan0 + (y * imgData.Stride);

for (int x = 0; x < width; x++) {
byte pixVal = (byte)PixData.GetDataByte(pixLine, x);

imgLine[x] = pixVal;
}
}
}

private void TransferPalette(Pix pix, Bitmap img)
{
var pallete = img.Palette;
var maxColors = pallete.Entries.Length;
var lastColor = maxColors - 1;
var colormap = pix.Colormap;
if (colormap != null && colormap.Count <= maxColors) {
var colormapCount = colormap.Count;
for (int i = 0; i < colormapCount; i++) {
pallete.Entries[i] = (SD.Color)colormap[i];
}
} else {
for (int i = 0; i < maxColors; i++) {
var value = (byte)(i * 255 / lastColor);
pallete.Entries[i] = SD.Color.FromArgb(value, value, value);
}
}
// This is required to force the palette to update!
img.Palette = pallete;
}


private PixelFormat GetPixelFormat(Pix pix)
{
switch (pix.Depth) {
case 1: return PixelFormat.Format1bppIndexed;
//case 2: return PixelFormat.Format4bppIndexed;
//case 4: return PixelFormat.Format4bppIndexed;
case 8: return PixelFormat.Format8bppIndexed;
case 16: return PixelFormat.Format16bppGrayScale;
case 32: return PixelFormat.Format32bppArgb;
default: throw new InvalidOperationException(String.Format("Pix depth {0} is not supported.", pix.Depth));
}
}
}
}
28 changes: 27 additions & 1 deletion src/Tesseract.Tests/EngineTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,32 @@ public void Initialise_ShouldThrowErrorIfDatapathNotCorrect()
}, Throws.InstanceOf(typeof(TesseractException)));
}

[Test]
public void Initialise_CanLoadConfigFile()
{
var tessDataPath = Path.Combine(Environment.CurrentDirectory, @"tessdata\");
using (var engine = new TesseractEngine(tessDataPath, "eng", EngineMode.Default, "bazzar")) {
// verify that the config file was loaded
string user_patterns_suffix;
if (engine.TryGetStringVariable("user_words_suffix", out user_patterns_suffix)) {
Assert.That(user_patterns_suffix, Is.EqualTo("user-words"));
} else {
Assert.Fail("Failed to retrieve value for 'user_words_suffix'.");
}

using (var img = Pix.LoadFromFile("./phototest.tif")) {
using (var page = engine.Process(img)) {
var text = page.GetText();

const string expectedText =
"This is a Iot of 12 point text to test the\nocr code and see if it works on all types\nof file format.\n\nThe quick brown dog jumped over the\nIazy fox. The quick brown dog jumped\nover the Iazy fox. The quick brown dog\njumped over the Iazy fox. The quick\nbrown dog jumped over the Iazy fox.\n\n";

Assert.That(text, Is.EqualTo(expectedText));
}
}
}
}

[Test]
public void CanProcessMultipageTif()
{
Expand Down Expand Up @@ -101,7 +127,7 @@ public void CanProcessPix()
}
}
}

[Test]
public void CanProcessMultiplePixs()
{
Expand Down
11 changes: 11 additions & 0 deletions src/Tesseract.Tests/Tesseract.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
</Compile>
<Compile Include="AnalyseResultTests.cs" />
<Compile Include="BaseApiTests.cs" />
<Compile Include="Data\PixToBitmapConverter.cs" />
<Compile Include="EngineTests.cs" />
<Compile Include="Leptonica\BitmapHelperTests.cs" />
<Compile Include="Leptonica\ColorTests.cs" />
Expand Down Expand Up @@ -130,12 +131,16 @@
</ProjectReference>
</ItemGroup>
<ItemGroup>
<Folder Include="tessdata\configs" />
<Folder Include="Data\Ocr" />
<Folder Include="Data\processing" />
<Folder Include="Results" />
<Folder Include="tessdata" />
</ItemGroup>
<ItemGroup>
<None Include="tessdata\configs\bazzar">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Include="Data\Ocr\uzn-test.png">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down Expand Up @@ -176,6 +181,12 @@
<None Include="tessdata\eng.traineddata">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Include="tessdata\eng.user-patterns">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Include="tessdata\eng.user-words">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>
4 changes: 4 additions & 0 deletions src/Tesseract.Tests/tessdata/Configs/bazzar
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
load_system_dawg F
load_freq_dawg F
user_words_suffix user-words
user_patterns_suffix user-patterns
2 changes: 2 additions & 0 deletions src/Tesseract.Tests/tessdata/eng.user-patterns
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
1-\d\d\d-GOOG-411
www.\n\\\*.com
5 changes: 5 additions & 0 deletions src/Tesseract.Tests/tessdata/eng.user-words
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
the
quick
brown
fox
jumped
27 changes: 23 additions & 4 deletions src/Tesseract/Interop/BaseApi.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
using InteropDotNet;
using Tesseract.Internal;

namespace Tesseract.Interop
{
Expand Down Expand Up @@ -53,9 +55,7 @@ int BaseApiInit(HandleRef handle,
string datapath,
string language,
int mode,
IntPtr configs, int configs_size,
IntPtr vars_vec, int vars_vec_size,
IntPtr vars_values, int vars_values_size);
string[] configs, int configs_size);


[RuntimeDllImport(Constants.TesseractDllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPISetVariable")]
Expand Down Expand Up @@ -205,7 +205,26 @@ public static void Initialize()
native = InteropRuntimeImplementer.CreateInstance<ITessApiSignatures>();
}
}


public static int BaseApiInit(HandleRef handle, string datapath, string language, int mode, IEnumerable<string> configFiles)
{
Guard.Require("handle", handle.Handle != IntPtr.Zero, "Handle for BaseApi, created through BaseApiCreate is required.");
Guard.RequireNotNullOrEmpty("language", language);

string[] configFilesArray;
if (configFiles is string[]) {
configFilesArray = (string[])configFiles;
} else if(configFiles is List<string>) {
configFilesArray = ((List<string>)configFiles).ToArray();
} else if(configFiles != null) {
configFilesArray = new List<string>(configFiles).ToArray();
} else {
configFilesArray = new string[0];
}

return Native.BaseApiInit(handle, datapath, language, mode, configFilesArray, configFilesArray.Length);
}

public static int BaseApiSetVariable(HandleRef handle, string name, string value)
{
IntPtr valuePtr = IntPtr.Zero;
Expand Down
Loading

0 comments on commit 2fc50df

Please sign in to comment.