From 727acaf94b4fe7ef143d1500f629488f48fe00be Mon Sep 17 00:00:00 2001 From: Samuel Audet Date: Tue, 10 Feb 2015 21:59:16 +0900 Subject: [PATCH] * Include missing `ltrresultiterator.h` header file in the presets for Tesseract (issue #36) --- CHANGELOG.md | 1 + .../bytedeco/javacpp/presets/tesseract.java | 2 +- .../java/org/bytedeco/javacpp/tesseract.java | 251 +++++++++++++++++- 3 files changed, 241 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76b3c62e370..739197aa5d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ + * Include missing `ltrresultiterator.h` header file in the presets for Tesseract ([issue #36](https://github.com/bytedeco/javacpp-presets/issues/36)) * Append `@Documented` to annotation types to have them picked up by Javadoc ### December 23, 2014 version 0.10 diff --git a/tesseract/src/main/java/org/bytedeco/javacpp/presets/tesseract.java b/tesseract/src/main/java/org/bytedeco/javacpp/presets/tesseract.java index 28185e6c6a6..c0336d40437 100644 --- a/tesseract/src/main/java/org/bytedeco/javacpp/presets/tesseract.java +++ b/tesseract/src/main/java/org/bytedeco/javacpp/presets/tesseract.java @@ -32,7 +32,7 @@ */ @Properties(target="org.bytedeco.javacpp.tesseract", inherit=lept.class, value={ @Platform(define="TESS_CAPI_INCLUDE_BASEAPI", include={"tesseract/platform.h", "tesseract/apitypes.h", "tesseract/thresholder.h", - "tesseract/unichar.h", "tesseract/host.h", "tesseract/tesscallback.h", "tesseract/publictypes.h", "tesseract/pageiterator.h", + "tesseract/unichar.h", "tesseract/host.h", "tesseract/tesscallback.h", "tesseract/publictypes.h", "tesseract/pageiterator.h", "tesseract/ltrresultiterator.h", "tesseract/resultiterator.h", "tesseract/strngs.h", "tesseract/genericvector.h", "tesseract/baseapi.h", "tesseract/capi.h"}, link="tesseract@.3"), @Platform(value="android", link="tesseract"), @Platform(value="windows", link="libtesseract", preload="libtesseract-3") }) diff --git a/tesseract/src/main/java/org/bytedeco/javacpp/tesseract.java b/tesseract/src/main/java/org/bytedeco/javacpp/tesseract.java index 7d05d867ef1..dbf4eeed499 100644 --- a/tesseract/src/main/java/org/bytedeco/javacpp/tesseract.java +++ b/tesseract/src/main/java/org/bytedeco/javacpp/tesseract.java @@ -1871,6 +1871,245 @@ public native void ParagraphInfo(@Cast("tesseract::ParagraphJustification*") int // #endif // TESSERACT_CCMAIN_PAGEITERATOR_H__ +// Parsed from tesseract/ltrresultiterator.h + +/////////////////////////////////////////////////////////////////////// +// File: ltrresultiterator.h +// Description: Iterator for tesseract results in strict left-to-right +// order that avoids using tesseract internal data structures. +// Author: Ray Smith +// Created: Fri Feb 26 11:01:06 PST 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +// #ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__ +// #define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__ + +// #include "platform.h" +// #include "pageiterator.h" +// #include "unichar.h" + +@Opaque public static class BLOB_CHOICE_IT extends Pointer { + public BLOB_CHOICE_IT() { } + public BLOB_CHOICE_IT(Pointer p) { super(p); } +} +@Opaque public static class WERD_RES extends Pointer { + public WERD_RES() { } + public WERD_RES(Pointer p) { super(p); } +} + +// Class to iterate over tesseract results, providing access to all levels +// of the page hierarchy, without including any tesseract headers or having +// to handle any tesseract structures. +// WARNING! This class points to data held within the TessBaseAPI class, and +// therefore can only be used while the TessBaseAPI class still exists and +// has not been subjected to a call of Init, SetImage, Recognize, Clear, End +// DetectOS, or anything else that changes the internal PAGE_RES. +// See apitypes.h for the definition of PageIteratorLevel. +// See also base class PageIterator, which contains the bulk of the interface. +// LTRResultIterator adds text-specific methods for access to OCR output. + +@Namespace("tesseract") @NoOffset public static class LTRResultIterator extends PageIterator { + static { Loader.load(); } + public LTRResultIterator() { } + public LTRResultIterator(Pointer p) { super(p); } + + // page_res and tesseract come directly from the BaseAPI. + // The rectangle parameters are copied indirectly from the Thresholder, + // via the BaseAPI. They represent the coordinates of some rectangle in an + // original image (in top-left-origin coordinates) and therefore the top-left + // needs to be added to any output boxes in order to specify coordinates + // in the original image. See TessBaseAPI::SetRectangle. + // The scale and scaled_yres are in case the Thresholder scaled the image + // rectangle prior to thresholding. Any coordinates in tesseract's image + // must be divided by scale before adding (rect_left, rect_top). + // The scaled_yres indicates the effective resolution of the binary image + // that tesseract has been given by the Thresholder. + // After the constructor, Begin has already been called. + public LTRResultIterator(PAGE_RES page_res, Tesseract tesseract, + int scale, int scaled_yres, + int rect_left, int rect_top, + int rect_width, int rect_height) { allocate(page_res, tesseract, scale, scaled_yres, rect_left, rect_top, rect_width, rect_height); } + private native void allocate(PAGE_RES page_res, Tesseract tesseract, + int scale, int scaled_yres, + int rect_left, int rect_top, + int rect_width, int rect_height); + + // LTRResultIterators may be copied! This makes it possible to iterate over + // all the objects at a lower level, while maintaining an iterator to + // objects at a higher level. These constructors DO NOT CALL Begin, so + // iterations will continue from the location of src. + // TODO: For now the copy constructor and operator= only need the base class + // versions, but if new data members are added, don't forget to add them! + + // ============= Moving around within the page ============. + + // See PageIterator. + + // ============= Accessing data ==============. + + // Returns the null terminated UTF-8 encoded text string for the current + // object at the given level. Use delete [] to free after use. + public native @Cast("char*") BytePointer GetUTF8Text(@Cast("tesseract::PageIteratorLevel") int level); + + // Set the string inserted at the end of each text line. "\n" by default. + public native void SetLineSeparator(@Cast("const char*") BytePointer new_line); + public native void SetLineSeparator(String new_line); + + // Set the string inserted at the end of each paragraph. "\n" by default. + public native void SetParagraphSeparator(@Cast("const char*") BytePointer new_para); + public native void SetParagraphSeparator(String new_para); + + // Returns the mean confidence of the current object at the given level. + // The number should be interpreted as a percent probability. (0.0f-100.0f) + public native float Confidence(@Cast("tesseract::PageIteratorLevel") int level); + + // ============= Functions that refer to words only ============. + + // Returns the font attributes of the current word. If iterating at a higher + // level object than words, eg textlines, then this will return the + // attributes of the first word in that textline. + // The actual return value is a string representing a font name. It points + // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as + // the iterator itself, ie rendered invalid by various members of + // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. + // Pointsize is returned in printers points (1/72 inch.) + public native @Cast("const char*") BytePointer WordFontAttributes(@Cast("bool*") BoolPointer is_bold, + @Cast("bool*") BoolPointer is_italic, + @Cast("bool*") BoolPointer is_underlined, + @Cast("bool*") BoolPointer is_monospace, + @Cast("bool*") BoolPointer is_serif, + @Cast("bool*") BoolPointer is_smallcaps, + IntPointer pointsize, + IntPointer font_id); + public native String WordFontAttributes(@Cast("bool*") BoolPointer is_bold, + @Cast("bool*") BoolPointer is_italic, + @Cast("bool*") BoolPointer is_underlined, + @Cast("bool*") BoolPointer is_monospace, + @Cast("bool*") BoolPointer is_serif, + @Cast("bool*") BoolPointer is_smallcaps, + IntBuffer pointsize, + IntBuffer font_id); + public native @Cast("const char*") BytePointer WordFontAttributes(@Cast("bool*") BoolPointer is_bold, + @Cast("bool*") BoolPointer is_italic, + @Cast("bool*") BoolPointer is_underlined, + @Cast("bool*") BoolPointer is_monospace, + @Cast("bool*") BoolPointer is_serif, + @Cast("bool*") BoolPointer is_smallcaps, + int[] pointsize, + int[] font_id); + + // Return the name of the language used to recognize this word. + // On error, NULL. Do not delete this pointer. + public native @Cast("const char*") BytePointer WordRecognitionLanguage(); + + // Return the overall directionality of this word. + public native @Cast("StrongScriptDirection") int WordDirection(); + + // Returns true if the current word was found in a dictionary. + public native @Cast("bool") boolean WordIsFromDictionary(); + + // Returns true if the current word is numeric. + public native @Cast("bool") boolean WordIsNumeric(); + + // Returns true if the word contains blamer information. + public native @Cast("bool") boolean HasBlamerInfo(); + + // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle + // of the current word. + public native @Const Pointer GetParamsTrainingBundle(); + + // Returns a pointer to the string with blamer information for this word. + // Assumes that the word's blamer_bundle is not NULL. + public native @Cast("const char*") BytePointer GetBlamerDebug(); + + // Returns a pointer to the string with misadaption information for this word. + // Assumes that the word's blamer_bundle is not NULL. + public native @Cast("const char*") BytePointer GetBlamerMisadaptionDebug(); + + // Returns true if a truth string was recorded for the current word. + public native @Cast("bool") boolean HasTruthString(); + + // Returns true if the given string is equivalent to the truth string for + // the current word. + public native @Cast("bool") boolean EquivalentToTruth(@Cast("const char*") BytePointer str); + public native @Cast("bool") boolean EquivalentToTruth(String str); + + // Returns a null terminated UTF-8 encoded truth string for the current word. + // Use delete [] to free after use. + public native @Cast("char*") BytePointer WordTruthUTF8Text(); + + // Returns a null terminated UTF-8 encoded normalized OCR string for the + // current word. Use delete [] to free after use. + public native @Cast("char*") BytePointer WordNormedUTF8Text(); + + // Returns a pointer to serialized choice lattice. + // Fills lattice_size with the number of bytes in lattice data. + public native @Cast("const char*") BytePointer WordLattice(IntPointer lattice_size); + public native String WordLattice(IntBuffer lattice_size); + public native @Cast("const char*") BytePointer WordLattice(int[] lattice_size); + + // ============= Functions that refer to symbols only ============. + + // Returns true if the current symbol is a superscript. + // If iterating at a higher level object than symbols, eg words, then + // this will return the attributes of the first symbol in that word. + public native @Cast("bool") boolean SymbolIsSuperscript(); + // Returns true if the current symbol is a subscript. + // If iterating at a higher level object than symbols, eg words, then + // this will return the attributes of the first symbol in that word. + public native @Cast("bool") boolean SymbolIsSubscript(); + // Returns true if the current symbol is a dropcap. + // If iterating at a higher level object than symbols, eg words, then + // this will return the attributes of the first symbol in that word. + public native @Cast("bool") boolean SymbolIsDropcap(); +} + +// Class to iterate over the classifier choices for a single RIL_SYMBOL. +@Namespace("tesseract") @NoOffset public static class ChoiceIterator extends Pointer { + static { Loader.load(); } + public ChoiceIterator() { } + public ChoiceIterator(Pointer p) { super(p); } + + // Construction is from a LTRResultIterator that points to the symbol of + // interest. The ChoiceIterator allows a one-shot iteration over the + // choices for this symbol and after that is is useless. + public ChoiceIterator(@Const @ByRef LTRResultIterator result_it) { allocate(result_it); } + private native void allocate(@Const @ByRef LTRResultIterator result_it); + + // Moves to the next choice for the symbol and returns false if there + // are none left. + public native @Cast("bool") boolean Next(); + + // ============= Accessing data ==============. + + // Returns the null terminated UTF-8 encoded text string for the current + // choice. + // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an + // internal structure and should NOT be delete[]ed to free after use. + public native @Cast("const char*") BytePointer GetUTF8Text(); + + // Returns the confidence of the current choice. + // The number should be interpreted as a percent probability. (0.0f-100.0f) + public native float Confidence(); +} + + // namespace tesseract. + +// #endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__ + + // Parsed from tesseract/resultiterator.h /////////////////////////////////////////////////////////////////////// @@ -1899,14 +2138,6 @@ public native void ParagraphInfo(@Cast("tesseract::ParagraphJustification*") int // #include "platform.h" // #include "ltrresultiterator.h" -@Opaque public static class BLOB_CHOICE_IT extends Pointer { - public BLOB_CHOICE_IT() { } - public BLOB_CHOICE_IT(Pointer p) { super(p); } -} -@Opaque public static class WERD_RES extends Pointer { - public WERD_RES() { } - public WERD_RES(Pointer p) { super(p); } -} @Namespace("tesseract") @NoOffset public static class ResultIterator extends LTRResultIterator { static { Loader.load(); } @@ -3009,10 +3240,6 @@ public Dict() { } public EquationDetect() { } public EquationDetect(Pointer p) { super(p); } } -@Namespace("tesseract") @Opaque public static class LTRResultIterator extends Pointer { - public LTRResultIterator() { } - public LTRResultIterator(Pointer p) { super(p); } -} @Namespace("tesseract") @Opaque public static class MutableIterator extends Pointer { public MutableIterator() { } public MutableIterator(Pointer p) { super(p); }