-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdf_parser.js
46 lines (34 loc) · 1.88 KB
/
pdf_parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import * as pdfjsLib from './pdf.mjs';
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdf.worker.mjs';
document.getElementById('pdf-file').addEventListener('change', function(event) {
const file = event.target.files[0];
if (file && file.type === 'application/pdf') {
const fileReader = new FileReader();
fileReader.onload = function() {
const typedArray = new Uint8Array(this.result);
pdfjsLib.getDocument(typedArray).promise.then(pdf => {
//console.log('PDF loaded');
let totalText = '';
const totalPages = pdf.numPages;
const pagePromises = [];
//loop to parse all pages
for (let i = 1; i <= totalPages; i++) {
pagePromises.push(
pdf.getPage(i).then(page => {
return page.getTextContent().then(textContent => {
return textContent.items.map(item => item.str).join(' ');
});
})
);
}
Promise.all(pagePromises).then(pagesText => {
totalText = pagesText.join(' ');
console.log('Extracted text from all pages:', totalText);
const relevantInfo = extractRelevantInfo(totalText);
fillForm(relevantInfo);
});
})
};
fileReader.readAsArrayBuffer(file);
}
});