Skip to content

Commit

Permalink
Got drag and drop of PDF and files working, closes #2
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Feb 26, 2024
1 parent 7789c6a commit 47d7225
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 2 deletions.
3 changes: 3 additions & 0 deletions datasette_extract/static/extract.css
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
form.extract-form label {
width: auto;
padding-top: 0.3em;
}
textarea.drag-over {
background-color: pink;
}
81 changes: 79 additions & 2 deletions datasette_extract/templates/extract_create_table.html
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,89 @@ <h1>Extract data and create a new table in {{ database }}</h1>
</label>
</p>
{% endfor %}
<p><label for="id_content">Paste data here, or drag and drop text or PDF files:</label></p>
<p>
<textarea name="content" style="width: 100%; height: 20em;" placeholder="Paste content here"></textarea>
<textarea name="content" id="id_content" style="width: 100%; height: 20em;" placeholder="Paste content here"></textarea>
</p>
<p>
<input type="submit" value="Extract">
</p>
</form>

{% endblock %}
<script type="module">
import pdfjs from 'https://cdn.jsdelivr.net/npm/pdfjs-dist@4.0.379/+esm';
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@4.0.379/build/pdf.worker.mjs';

async function extractText(pdf) {
const loadingTask = pdfjsLib.getDocument(pdf);
const pdfDocument = await loadingTask.promise;

const numPages = pdfDocument.numPages;
let pageTextPromises = [];

for (let i = 1; i <= numPages; i++) {
pageTextPromises.push(
pdfDocument.getPage(i).then((page) => page.getTextContent())
);
}

const pageTexts = await Promise.all(pageTextPromises);
return pageTexts
.map((item) => item.items.map((text) => text.str).join(" "))
.join("\n\n");
}

const textarea = document.getElementById('id_content');

function dragOverHandler(event) {
event.preventDefault();
event.stopPropagation();
textarea.classList.add('drag-over');
}
function dragLeaveHandler(event) {
event.preventDefault();
event.stopPropagation();
textarea.classList.remove('drag-over');
}
async function dropHandler(event) {
event.preventDefault();
event.stopPropagation();
textarea.classList.remove('drag-over');
const includeFilenames = event.dataTransfer.files.length > 1;

var promises = Array.from(event.dataTransfer.files).map(file => {
return new Promise(async (resolve, reject) => {
let contentToAdd = '';
if (includeFilenames) {
contentToAdd += file.name + '\n\n';
}
if (file.type == 'application/pdf') {
const pdfContent = await extractText(new Uint8Array(await file.arrayBuffer()));
contentToAdd += pdfContent;
resolve(contentToAdd);
} else {
// Try to read the file as text
const reader = new FileReader();
reader.onload = (e) => {
contentToAdd += e.target.result;
resolve(contentToAdd);
};
reader.onerror = (e) => {
reject(new Error('Failed to read file'));
};
reader.readAsText(file);
}
});
});
Promise.all(promises).then(contents => {
textarea.value = contents.join('\n\n');
});
}

textarea.addEventListener('dragover', dragOverHandler);
textarea.addEventListener('dragleave', dragLeaveHandler);
textarea.addEventListener('drop', dropHandler);

</script>

{% endblock %}

0 comments on commit 47d7225

Please sign in to comment.