Skip to content

Commit

Permalink
Combined drop zone, refs #16
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Mar 7, 2024
1 parent eb8bb20 commit bdf356c
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 120 deletions.
143 changes: 143 additions & 0 deletions datasette_extract/templates/_extract_drop_handler.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
<script type="module">
const textarea = document.getElementById("id_content");
const imageInput = document.getElementById("id_image");
const processingMessage = document.getElementById("processing_message");

async function extractText(pdf) {
const pdfjs = (
await import("/-/static-plugins/datasette_extract/pdfjs-dist-4-0-379.js")
).default;
pdfjsLib.GlobalWorkerOptions.workerSrc =
"/-/static-plugins/datasette_extract/pdf.worker.mjs";

const loadingTask = pdfjsLib.getDocument(pdf);
const pdfDocument = await loadingTask.promise;

const numPages = pdfDocument.numPages;
let pageTextPromises = [];

for (let i = 1; i <= numPages; i++) {
pageTextPromises.push(
pdfDocument.getPage(i).then((page) => page.getTextContent()),
);
}

const pageTexts = await Promise.all(pageTextPromises);
return pageTexts
.map((item) => item.items.map((text) => text.str).join(" "))
.join("\n\n");
}

async function convertHeicToJpeg(file) {
const heic2any = (
await import("/-/static-plugins/datasette_extract/heic2any-0.0.4.min.js")
).default;
try {
const blob = await heic2any({
blob: file,
toType: "image/jpeg",
quality: 0.8,
});
return new File([blob], `converted-${file.name}.jpeg`, {
type: "image/jpeg",
});
} catch (error) {
console.error("Conversion error:", error);
return null;
}
}

function dragOverHandler(event) {
event.preventDefault();
event.stopPropagation();
textarea.classList.add("drag-over");
}

function dragLeaveHandler(event) {
event.preventDefault();
event.stopPropagation();
textarea.classList.remove("drag-over");
}

async function dropHandler(event) {
event.preventDefault();
event.stopPropagation();
textarea.classList.remove("drag-over");

const files = Array.from(event.dataTransfer.files);
const imageFiles = files.filter((file) => file.type.startsWith("image/"));
const pdfFiles = files.filter((file) => file.type === "application/pdf");
const otherFiles = files.filter(
(file) =>
!file.type.startsWith("image/") && file.type !== "application/pdf",
);

if (imageFiles.length > 0) {
processingMessage.style.display = "block";
const convertedImages = await Promise.all(
imageFiles.map(async (file) => {
if (file.type === "image/heic") {
return await convertHeicToJpeg(file);
}
return file;
}),
);
const validImages = convertedImages.filter((file) => file !== null);
if (validImages.length > 0) {
const dataTransfer = new DataTransfer();
validImages.forEach((file) => dataTransfer.items.add(file));
imageInput.files = dataTransfer.files;
}
processingMessage.style.display = "none";
}

if (pdfFiles.length > 0) {
processingMessage.style.display = "block";
const pdfTexts = await Promise.all(
pdfFiles.map(async (file) =>
extractText(new Uint8Array(await file.arrayBuffer())),
),
);
textarea.value = pdfTexts.join("\n\n");
processingMessage.style.display = "none";
}

if (otherFiles.length > 0) {
const otherContents = await Promise.all(
otherFiles.map(
(file) =>
new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (e) => {
resolve(e.target.result);
};
reader.onerror = (e) => {
reject(new Error("Failed to read file"));
};
reader.readAsText(file);
}),
),
);
textarea.value +=
(textarea.value ? "\n\n" : "") + otherContents.join("\n\n");
}
}

imageInput.addEventListener("change", async (event) => {
const file = event.target.files[0];
if (file && file.type === "image/heic") {
processingMessage.style.display = "block";
const convertedFile = await convertHeicToJpeg(file);
if (convertedFile) {
const dataTransfer = new DataTransfer();
dataTransfer.items.add(convertedFile);
imageInput.files = dataTransfer.files;
}
processingMessage.style.display = "none";
}
});

textarea.addEventListener("dragover", dragOverHandler);
textarea.addEventListener("dragleave", dragLeaveHandler);
textarea.addEventListener("drop", dropHandler);
</script>
36 changes: 0 additions & 36 deletions datasette_extract/templates/_extract_heic.html

This file was deleted.

75 changes: 0 additions & 75 deletions datasette_extract/templates/_extract_pdf_drop.html

This file was deleted.

9 changes: 5 additions & 4 deletions datasette_extract/templates/extract_create_table.html
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,15 @@ <h1>Extract data and create a new table in {{ database }}</h1>
<p>
<textarea name="content" id="id_content" style="width: 100%; height: 20em;" placeholder="Paste content here"></textarea>
</p>
<p><label>Or upload an image: <input type="file" id="id_image" name="image"></label></p>
<p id="processing_message" style="display: none;">Processing...</p>
<p>
<label>Or upload an image: <input type="file" id="id_image" name="image"></label>
</p>
<p>
<input type="submit" value="Extract">
</p>
</form>

{% include "_extract_heic.html" %}

{% include "_extract_pdf_drop.html" %}
{% include "_extract_drop_handler.html" %}

{% endblock %}
9 changes: 4 additions & 5 deletions datasette_extract/templates/extract_to_table.html
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,14 @@ <h1>Extract data into {{ database }} / {{ table }}</h1>
<p>
<textarea name="content" id="id_content" style="width: 100%; height: 20em;" placeholder="Paste content here"></textarea>
</p>
<p><label>Or upload an image: <input type="file" id="id_image" name="image"></label></p>
<p id="processing_message" style="display: none;">Processing...</p>
<p>
<input type="submit" value="Extract">
<label>Or upload an image: <input type="file" id="id_image" name="image"></label>
</p>
<p><input type="submit" value="Extract"></p>
</form>

{% include "_extract_heic.html" %}

{% include "_extract_pdf_drop.html" %}
{% include "_extract_drop_handler.html" %}

{% if previous_runs %}
<h2>Previous extraction tasks</h2>
Expand Down

0 comments on commit bdf356c

Please sign in to comment.