Skip to content

Commit

Permalink
Remove tesseract engine mode selection (#46)
Browse files Browse the repository at this point in the history
Bug: T285262
  • Loading branch information
Daimona authored Jun 22, 2021
1 parent 1d8d923 commit adb07c3
Show file tree
Hide file tree
Showing 5 changed files with 4 additions and 44 deletions.
5 changes: 0 additions & 5 deletions i18n/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,6 @@
"tesseract-psm-11": "Sparse text. Find as much text as possible in no particular order.",
"tesseract-psm-12": "Sparse text with OSD.",
"tesseract-psm-13": "Raw line. Treat the image as a single text line, bypassing hacks that are Tesseract-specific.",
"tesseract-oem-label": "OCR Engine modes",
"tesseract-oem-0": "Legacy engine only.",
"tesseract-oem-1": "Neural nets LSTM engine only.",
"tesseract-oem-2": "Legacy + LSTM engines.",
"tesseract-oem-3": "Default, based on what is available.",
"tesseract-param-error": "The '$1' option with a value of $2 is not supported by Tesseract. Maximum value: $3",
"tesseract-internal-error": "The tesseract engine returned an internal error."
}
5 changes: 0 additions & 5 deletions i18n/qqq.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,6 @@
"tesseract-psm-11": "Form option for Tesseract page segmentation mode.",
"tesseract-psm-12": "Form option for Tesseract page segmentation mode.",
"tesseract-psm-13": "Form option for Tesseract page segmentation mode.",
"tesseract-oem-label": "Form label for the Tesseract OCR engine mode.",
"tesseract-oem-0": "Form option for Tesseract OCR engine mode.",
"tesseract-oem-1": "Form option for Tesseract OCR engine mode.",
"tesseract-oem-2": "Form option for Tesseract OCR engine mode.",
"tesseract-oem-3": "Form option for Tesseract OCR engine mode.",
"tesseract-param-error": "Error message displayed when invalid values for Tesseract options are submitted.\n\nParameters:\n* $1 – the form label for the option. This will either be {{msg-wm|tesseract-psm-label}} or {{msg-wm|tesseract-oem-label}}.\n* $2 – The value that was given.\n* $3 – the maximum value for the option (this will be an integer).",
"tesseract-internal-error": "Generic error message displayed when the tesseract command fails."
}
12 changes: 4 additions & 8 deletions src/Controller/OcrController.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ class OcrController extends AbstractController
'engine' => self::DEFAULT_ENGINE,
'langs' => [],
'psm' => TesseractEngine::DEFAULT_PSM,
'oem' => TesseractEngine::DEFAULT_OEM,
];

/** @var string */
Expand Down Expand Up @@ -102,16 +101,14 @@ private function setup(): void
*/
private function setEngineOptions(): void
{
// These are always set, even if Tesseract isn't initially chosen as the engine
// because we want these defaults set if the user changes the engine to Tesseract.
// This is always set, even if Tesseract isn't initially chosen as the engine
// because we want the default set if the user changes the engine to Tesseract.
static::$params['psm'] = (int)$this->request->query->get('psm', (string)static::$params['psm']);
static::$params['oem'] = (int)$this->request->query->get('oem', (string)static::$params['oem']);

// Apply the settings to the Engine itself. This is only done when Tesseract is chosen
// because these setters don't exist for the GoogleCloudVisionEngine.
// Apply the tesseract-specific settings
// NOTE: Intentionally excluding `oem`, see T285262
if (TesseractEngine::getId() === static::$params['engine']) {
$this->engine->setPsm(static::$params['psm']);
$this->engine->setOem(static::$params['oem']);
}
}

Expand Down Expand Up @@ -216,7 +213,6 @@ private function getText(): string
static::$params['engine'],
implode('|', static::$params['langs']),
static::$params['psm'],
static::$params['oem'],
]
));

Expand Down
16 changes: 0 additions & 16 deletions src/Engine/TesseractEngine.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@ class TesseractEngine extends EngineBase
/** @var int Default value for page segmentation mode. */
public const DEFAULT_PSM = 3;

/** @var int Maximum value for OCR engine mde. */
public const MAX_OEM = 3;

/** @var int Default value for OCR engine mode. */
public const DEFAULT_OEM = 3;

/**
* TesseractEngine constructor.
* @param HttpClientInterface $httpClient
Expand Down Expand Up @@ -98,16 +92,6 @@ public function setPsm(int $psm): void
$this->ocr->psm($psm);
}

/**
* Set the OCR engine mode.
* @param int $oem
*/
public function setOem(int $oem): void
{
$this->validateOption('oem', $oem, self::MAX_OEM);
$this->ocr->oem($oem);
}

/**
* Validates the given option.
* @param string $option
Expand Down
10 changes: 0 additions & 10 deletions templates/_tesseract_options.html.twig
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,4 @@
{% endfor %}
</select>
</div>
<div class="form-group">
<label for="oem">{{ msg('tesseract-oem-label') }}</label>
<select name="oem" id="oem" class="form-control">
{# The following messages are used:
# 'tesseract-oem-0', 'tesseract-oem-1', 'tesseract-oem-2', 'tesseract-oem-3' #}
{% for i in 0..3 %}
<option value="{{ i }}" {% if i == oem %}selected="selected"{% endif %}>{{ msg('tesseract-oem-' ~ i) }}</option>
{% endfor %}
</select>
</div>
</fieldset>

0 comments on commit adb07c3

Please sign in to comment.