Skip to content

Commit

Permalink
Cache OCR transcriptions
Browse files Browse the repository at this point in the history
1 hour by default

Bug: T282837
  • Loading branch information
MusikAnimal authored May 25, 2021
1 parent 2fe43e7 commit 5754d62
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 11 deletions.
2 changes: 2 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@ APP_LOG_SUBJECT="[Wikimedia OCR]"

# Comma-separated list of the host names (without protocols) of where images are stored.
APP_IMAGE_HOSTS=upload.wikimedia.org,upload.wikimedia.beta.wmflabs.org

APP_CACHE_TTL=3600 # 1 hour
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"ext-json": "*",
"google/cloud-vision": "^1.3",
"sensio/framework-extra-bundle": "^6.1",
"symfony/cache": "5.2.*",
"symfony/console": "5.2.*",
"symfony/dotenv": "5.2.*",
"symfony/flex": "^1.3.1",
Expand Down
15 changes: 9 additions & 6 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions config/services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Put parameters here that don't need to change on each machine where the app is deployed
# https://symfony.com/doc/current/best_practices/configuration.html#application-related-configuration
parameters:
cache_ttl: '%env(APP_CACHE_TTL)%'

services:
# default configuration for services in *this* file
Expand Down
42 changes: 38 additions & 4 deletions src/Controller/OcrController.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
use Symfony\Component\HttpFoundation\RequestStack;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Routing\Annotation\Route;
use Symfony\Contracts\Cache\CacheInterface;
use Symfony\Contracts\Cache\ItemInterface;

class OcrController extends AbstractController
{
Expand All @@ -22,6 +24,9 @@ class OcrController extends AbstractController
/** @var TesseractEngine|GoogleCloudVisionEngine */
protected $engine;

/** @var CacheInterface */
protected $cache;

/**
* The output params for the view or API response.
* This also serves as where you define the defaults.
Expand All @@ -43,11 +48,17 @@ class OcrController extends AbstractController
* @param RequestStack $requestStack
* @param Intuition $intuition
* @param EngineFactory $engineFactory
* @param CacheInterface $cache
*/
public function __construct(RequestStack $requestStack, Intuition $intuition, EngineFactory $engineFactory)
{
public function __construct(
RequestStack $requestStack,
Intuition $intuition,
EngineFactory $engineFactory,
CacheInterface $cache
) {
// Dependencies.
$this->intuition = $intuition;
$this->cache = $cache;

$request = $requestStack->getCurrentRequest();

Expand Down Expand Up @@ -118,7 +129,7 @@ public function homeAction(): Response
static::$params['image_hosts'] = $this->intuition->listToText(static::$params['image_hosts']);

if ($this->imageUrl) {
static::$params['text'] = $this->engine->getText($this->imageUrl, static::$params['langs']);
static::$params['text'] = $this->getText();
}

return $this->render('output.html.twig', static::$params);
Expand All @@ -132,7 +143,7 @@ public function homeAction(): Response
public function apiAction(): JsonResponse
{
return $this->getApiResponse(array_merge(static::$params, [
'text' => $this->engine->getText($this->imageUrl, static::$params['langs']),
'text' => $this->getText(),
]));
}

Expand Down Expand Up @@ -165,4 +176,27 @@ private function getApiResponse(array $params): JsonResponse
$response->setData($params);
return $response;
}

/**
* Get and cache the transcription based on options set in static::$params.
* @return string
*/
private function getText(): string
{
$cacheKey = md5(implode(
'|',
[
$this->imageUrl,
static::$params['engine'],
implode('|', static::$params['langs']),
static::$params['psm'],
static::$params['oem'],
]
));

return $this->cache->get($cacheKey, function (ItemInterface $item) {
$item->expiresAfter((int)$this->getParameter('cache_ttl'));
return $this->engine->getText($this->imageUrl, static::$params['langs']);
});
}
}
4 changes: 3 additions & 1 deletion tests/Controller/OcrControllerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
use App\Engine\TesseractEngine;
use Krinkle\Intuition\Intuition;
use PHPUnit\Framework\TestCase;
use Symfony\Component\Cache\Adapter\FilesystemAdapter;
use Symfony\Component\HttpClient\MockHttpClient;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\RequestStack;
Expand All @@ -32,7 +33,8 @@ public function testGetLang(array $getParams, array $expectedLangs): void
$controller = new OcrController(
$requestStack,
$intuition,
new EngineFactory($gcv, new TesseractEngine(new MockHttpClient(), $intuition, new TesseractOCR()))
new EngineFactory($gcv, new TesseractEngine(new MockHttpClient(), $intuition, new TesseractOCR())),
new FilesystemAdapter()
);
$this->assertSame($expectedLangs, $controller->getLangs($request));
}
Expand Down

0 comments on commit 5754d62

Please sign in to comment.