Skip to content

Commit

Permalink
Add ability to analyse layout without running recognition (#770)
Browse files Browse the repository at this point in the history
  • Loading branch information
Balearica authored May 29, 2023
1 parent 15fdd9e commit 6437f28
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 22 deletions.
1 change: 1 addition & 0 deletions src/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ declare namespace Tesseract {
interface OutputFormats {
text: boolean;
blocks: boolean;
layoutBlocks: boolean;
hocr: boolean;
tsv: boolean;
box: boolean;
Expand Down
2 changes: 2 additions & 0 deletions src/worker-script/constants/defaultOutput.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
module.exports = {
text: true,
blocks: true,
layoutBlocks: false,
hocr: true,
tsv: true,
box: false,
Expand All @@ -14,4 +15,5 @@ module.exports = {
imageColor: false,
imageGrey: false,
imageBinary: false,
debug: false,
};
14 changes: 9 additions & 5 deletions src/worker-script/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ const processOutput = (output) => {
if (params.tessjs_create_tsv === '1') workingOutput.tsv = true;
if (params.tessjs_create_unlv === '1') workingOutput.unlv = true;

const nonRecOutputs = ['imageColor', 'imageGrey', 'imageBinary'];
const nonRecOutputs = ['imageColor', 'imageGrey', 'imageBinary', 'layoutBlocks'];
let recOutputCount = 0;
for (const prop of Object.keys(output)) {
workingOutput[prop] = output[prop];
Expand All @@ -267,7 +267,8 @@ const processOutput = (output) => {
}
}
}
return { workingOutput, recOutputCount };
const skipRecognition = recOutputCount === 0;
return { workingOutput, skipRecognition };
};

// List of options for Tesseract.js (rather than passed through to Tesseract),
Expand Down Expand Up @@ -302,7 +303,7 @@ const recognize = async ({
}
}

const { workingOutput, recOutputCount } = processOutput(output);
const { workingOutput, skipRecognition } = processOutput(output);

// When the auto-rotate option is True, setImage is called with no angle,
// then the angle is calculated by Tesseract and then setImage is re-called.
Expand Down Expand Up @@ -352,14 +353,17 @@ const recognize = async ({
api.SetRectangle(rec.left, rec.top, rec.width, rec.height);
}

if (recOutputCount > 0) {
if (!skipRecognition) {
api.Recognize(null);
} else {
if (output.layoutBlocks) {
api.AnalyseLayout();
}
log('Skipping recognition: all output options requiring recognition are disabled.');
}
const { pdfTitle } = options;
const { pdfTextOnly } = options;
const result = dump(TessModule, api, workingOutput, { pdfTitle, pdfTextOnly });
const result = dump(TessModule, api, workingOutput, { pdfTitle, pdfTextOnly, skipRecognition });
result.rotateRadians = rotateRadiansFinal;

if (output.debug) TessModule.FS.unlink('/debugInternal.txt');
Expand Down
38 changes: 21 additions & 17 deletions src/worker-script/utils/dump.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ module.exports = (TessModule, api, output, options) => {
return TessModule.FS.readFile('/tesseract-ocr.pdf');
};

if (output.blocks) {
// If output.layoutBlocks is true and options.skipRecognition is true,
// the user wants layout data but text recognition has not been run.
// In this case, fields that require text recognition are skipped.
if (output.blocks || output.layoutBlocks) {
ri.Begin();
do {
if (ri.IsAtBeginningOf(RIL_BLOCK)) {
Expand All @@ -102,8 +105,8 @@ module.exports = (TessModule, api, output, options) => {

block = {
paragraphs: [],
text: ri.GetUTF8Text(RIL_BLOCK),
confidence: ri.Confidence(RIL_BLOCK),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_BLOCK) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_BLOCK) : null,
baseline: ri.getBaseline(RIL_BLOCK),
bbox: ri.getBoundingBox(RIL_BLOCK),
blocktype: enumToString(ri.BlockType(), 'PT'),
Expand All @@ -114,8 +117,8 @@ module.exports = (TessModule, api, output, options) => {
if (ri.IsAtBeginningOf(RIL_PARA)) {
para = {
lines: [],
text: ri.GetUTF8Text(RIL_PARA),
confidence: ri.Confidence(RIL_PARA),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_PARA) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_PARA) : null,
baseline: ri.getBaseline(RIL_PARA),
bbox: ri.getBoundingBox(RIL_PARA),
is_ltr: !!ri.ParagraphIsLtr(),
Expand All @@ -125,8 +128,8 @@ module.exports = (TessModule, api, output, options) => {
if (ri.IsAtBeginningOf(RIL_TEXTLINE)) {
textline = {
words: [],
text: ri.GetUTF8Text(RIL_TEXTLINE),
confidence: ri.Confidence(RIL_TEXTLINE),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_TEXTLINE) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_TEXTLINE) : null,
baseline: ri.getBaseline(RIL_TEXTLINE),
bbox: ri.getBoundingBox(RIL_TEXTLINE),
};
Expand All @@ -139,8 +142,8 @@ module.exports = (TessModule, api, output, options) => {
symbols: [],
choices: [],

text: ri.GetUTF8Text(RIL_WORD),
confidence: ri.Confidence(RIL_WORD),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_WORD) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_WORD) : null,
baseline: ri.getBaseline(RIL_WORD),
bbox: ri.getBoundingBox(RIL_WORD),

Expand All @@ -162,8 +165,8 @@ module.exports = (TessModule, api, output, options) => {
const wc = new TessModule.WordChoiceIterator(ri);
do {
word.choices.push({
text: wc.GetUTF8Text(),
confidence: wc.Confidence(),
text: !options.skipRecognition ? wc.GetUTF8Text() : null,
confidence: !options.skipRecognition ? wc.Confidence() : null,
});
} while (wc.Next());
TessModule.destroy(wc);
Expand All @@ -179,8 +182,8 @@ module.exports = (TessModule, api, output, options) => {
symbol = {
choices: [],
image: null,
text: ri.GetUTF8Text(RIL_SYMBOL),
confidence: ri.Confidence(RIL_SYMBOL),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_SYMBOL) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_SYMBOL) : null,
baseline: ri.getBaseline(RIL_SYMBOL),
bbox: ri.getBoundingBox(RIL_SYMBOL),
is_superscript: !!ri.SymbolIsSuperscript(),
Expand All @@ -191,8 +194,8 @@ module.exports = (TessModule, api, output, options) => {
const ci = new TessModule.ChoiceIterator(ri);
do {
symbol.choices.push({
text: ci.GetUTF8Text(),
confidence: ci.Confidence(),
text: !options.skipRecognition ? ci.GetUTF8Text() : null,
confidence: !options.skipRecognition ? ci.Confidence() : null,
});
} while (ci.Next());
// TessModule.destroy(i);
Expand All @@ -212,8 +215,9 @@ module.exports = (TessModule, api, output, options) => {
imageColor: output.imageColor ? getImage(imageType.COLOR) : null,
imageGrey: output.imageGrey ? getImage(imageType.GREY) : null,
imageBinary: output.imageBinary ? getImage(imageType.BINARY) : null,
confidence: api.MeanTextConf(),
blocks: output.blocks ? blocks : null,
confidence: !options.skipRecognition ? api.MeanTextConf() : null,
blocks: output.blocks && !options.skipRecognition ? blocks : null,
layoutBlocks: output.layoutBlocks && options.skipRecognition ? blocks : null,
psm: enumToString(api.GetPageSegMode(), 'PSM'),
oem: enumToString(api.oem(), 'OEM'),
version: api.Version(),
Expand Down

0 comments on commit 6437f28

Please sign in to comment.