Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve loadImage performance by between 20% and 100% #726

Closed
wants to merge 8 commits into from
51 changes: 51 additions & 0 deletions examples/browser/canvas.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<html>

<head>
<script src="/dist/tesseract.dev.js"></script>
<style>
.column {
float: left;
width: 20%;
padding: 5px;
}

</style>
</head>

<body>
<img id="img">
<canvas id="canvas"></canvas>

<script defer>
const recognize = async () => {
console.log('starting')
const worker = await Tesseract.createWorker({
// corePath: '/tesseract-core-simd.wasm.js',
workerPath: "/dist/worker.dev.js"
});
await worker.loadLanguage('eng');
await worker.initialize('eng');
await worker.initialize();
console.log('worker initialized')

const canvas = document.getElementById('canvas');
const img = document.getElementById('img');
img.src = '../data/meditations.jpg';


img.onload = async () => {
console.log('image loaded')
canvas.width = img.width
canvas.height = img.height
canvas.getContext('2d').drawImage(img, 0, 0, img.width, img.height)
console.time('recognize from canvas');
const ret = await worker.recognize(canvas);
console.timeEnd('recognize from canvas');
console.log(ret.data.text)
};
}
recognize();
</script>
</body>

</html>
6 changes: 4 additions & 2 deletions src/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,10 @@ declare namespace Tesseract {
GREY = 1,
BINARY = 2
}
type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement
| CanvasRenderingContext2D | File | Blob | ImageData | Buffer;
type ImageLike = string | HTMLImageElement | HTMLVideoElement
| HTMLCanvasElement | OffscreenCanvas | ImageData
| CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D
| File | Blob | Buffer;
interface Block {
paragraphs: Paragraph[];
text: string;
Expand Down
65 changes: 58 additions & 7 deletions src/worker/browser/loadImage.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,51 @@ const readFromBlobOrFile = (blob) => (
})
);

const rgba2rgb = (rgbaArray) => {
// Create a new array to hold only RGB values
const rgbArray = new Uint8Array((rgbaArray.length / 4) * 3);

for (let i = 0, j = 0; i < rgbaArray.length; i += 4, j += 3) {
// Copy over the RGB values, skipping the alpha channel
rgbArray[j] = rgbaArray[i];
rgbArray[j + 1] = rgbaArray[i + 1];
rgbArray[j + 2] = rgbaArray[i + 2];
}

return rgbArray;
};

/**
* imageDataToPBM
*
* @name imageDataToPBM
* @function
* @access private
*
* @see https://github.com/DanBloomberg/leptonica/blob/master/src/pnmio.c
* @see https://netpbm.sourceforge.net/doc/pam.html
*/
const imageDataToPBM = (imageData) => {
const { width, height, data } = imageData;
const rgb = rgba2rgb(data); // discard alpha channel
const DEPTH = 3; // channels per pixel (RGBA = 4, RGB = 3)
const MAXVAL = 255; // range of each channel (0-255)
const TUPLTYPE = 'RGB'; // or 'RGB_ALPHA'
let header = 'P7\n';
header += `WIDTH ${width}\n`;
header += `HEIGHT ${height}\n`;
header += `DEPTH ${DEPTH}\n`;
header += `MAXVAL ${MAXVAL}\n`;
header += `TUPLTYPE ${TUPLTYPE}\n`;
header += 'ENDHDR\n';
const encoder = new TextEncoder();
const binaryHeader = encoder.encode(header);
const binary = new Uint8Array(binaryHeader.length + rgb.length);
binary.set(binaryHeader);
binary.set(rgb, binaryHeader.length);
return binary;
};

/**
* loadImage
*
Expand All @@ -28,6 +73,7 @@ const readFromBlobOrFile = (blob) => (
* @access private
*/
const loadImage = async (image) => {
console.time('loadImage');
let data = image;
if (typeof image === 'undefined') {
return 'undefined';
Expand All @@ -43,25 +89,30 @@ const loadImage = async (image) => {
const resp = await fetch(resolveURL(image));
data = await resp.arrayBuffer();
}
} else if (image instanceof HTMLElement) {
} else if (ImageData && image instanceof ImageData) {
data = imageDataToPBM(image);
} else if (
(CanvasRenderingContext2D && image instanceof CanvasRenderingContext2D)
|| (OffscreenCanvasRenderingContext2D && image instanceof OffscreenCanvasRenderingContext2D)) {
const imageData = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
data = await loadImage(imageData);
} else if (OffscreenCanvas && image instanceof OffscreenCanvas) {
data = await loadImage(image.getContext('2d'));
} else if (HTMLElement && image instanceof HTMLElement) {
if (image.tagName === 'IMG') {
data = await loadImage(image.src);
}
if (image.tagName === 'VIDEO') {
data = await loadImage(image.poster);
}
if (image.tagName === 'CANVAS') {
await new Promise((resolve) => {
image.toBlob(async (blob) => {
data = await readFromBlobOrFile(blob);
resolve();
});
});
data = await loadImage(image.getContext('2d'));
}
} else if (image instanceof File || image instanceof Blob) {
data = await readFromBlobOrFile(image);
}

console.timeEnd('loadImage');
return new Uint8Array(data);
};

Expand Down
32 changes: 32 additions & 0 deletions tests/recognize.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,36 @@ describe('recognize()', () => {
}).timeout(TIMEOUT)
));
});

(IS_BROWSER ? describe : describe.skip)('should read video from OffscreenCanvas (browser only)', () => {
// img tag is unable to render pbm, so let's skip it.
const formats = FORMATS.filter(f => f !== 'pbm');
let offscreenCanvas = null;
let imageDOM = null;
let idx = 0;
beforeEach((done) => {
imageDOM = document.createElement('img');
imageDOM.setAttribute('crossOrigin', 'Anonymous');
imageDOM.onload = () => {
offscreenCanvas = new OffscreenCanvas(imageDOM.width, imageDOM.height)
offscreenCanvas.getContext('2d').drawImage(imageDOM, 0, 0);
done();
};
imageDOM.setAttribute('src', `${IMAGE_PATH}/simple.${formats[idx]}`);
idx += 1;
});

afterEach(() => {
offscreenCanvas = null;
imageDOM.remove();
});

formats.forEach(format => (
it(`support ${format} format`, async () => {
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(offscreenCanvas);
expect(text).to.be(SIMPLE_TEXT);
}).timeout(TIMEOUT)
));
});
});