Skip to content

Commit

Permalink
fix(routes/pdf): limit outputEncoding qs params to poppler charsets (
Browse files Browse the repository at this point in the history
…#1353)

* fix(routes/pdf/txt): limit charsets to poppler supported ones only

* fix(routes/pdf/html): limit charsets to poppler supported ones only

* fix(routes/pdf): add a few extra charsets
  • Loading branch information
Fdawgs authored Mar 24, 2023
1 parent 3eb0d71 commit 92500d6
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 7 deletions.
6 changes: 1 addition & 5 deletions src/plugins/pdf-to-html/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,7 @@ async function plugin(server, options) {
* Remove excess title and meta elements left behind by Poppler;
* Poppler appends `-html` to the file name
*/
const dom = new JSDOM(
await fs.readFile(`${tempFile}-html.html`, {
encoding: config.pdfToHtmlOptions.outputEncoding,
})
);
const dom = new JSDOM(await fs.readFile(`${tempFile}-html.html`));
const titles = dom.window.document.querySelectorAll("title");

// Overwrite title set by Poppler, which reveals directories
Expand Down
26 changes: 25 additions & 1 deletion src/routes/pdf/html/schema.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,31 @@ const pdfToHtmlPostSchema = {
S.string()
.default("UTF-8")
.description("Sets the encoding to use for text output")
.pattern(/^[-\w]+$/m)
// Encodings supported by Poppler
.enum([
"ASCII7",
"Big5",
"Big5ascii",
"EUC-CN",
"EUC-JP",
"GBK",
"ISO-2022-CN",
"ISO-2022-JP",
"ISO-2022-KR",
"ISO-8859-6",
"ISO-8859-7",
"ISO-8859-8",
"ISO-8859-9",
"KOI8-R",
"Latin1",
"Latin2",
"Shift-JIS",
"TIS-620",
"UTF-8",
"UTF-16",
"Windows-1255",
"ZapfDingbats",
])
)
.prop(
"ownerPassword",
Expand Down
26 changes: 25 additions & 1 deletion src/routes/pdf/txt/schema.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,31 @@ const pdfToTxtPostSchema = {
S.string()
.default("UTF-8")
.description("Sets the encoding to use for text output")
.pattern(/^[-\w]+$/m)
// Encodings supported by Poppler
.enum([
"ASCII7",
"Big5",
"Big5ascii",
"EUC-CN",
"EUC-JP",
"GBK",
"ISO-2022-CN",
"ISO-2022-JP",
"ISO-2022-KR",
"ISO-8859-6",
"ISO-8859-7",
"ISO-8859-8",
"ISO-8859-9",
"KOI8-R",
"Latin1",
"Latin2",
"Shift-JIS",
"TIS-620",
"UTF-8",
"UTF-16",
"Windows-1255",
"ZapfDingbats",
])
)
.prop(
"ownerPassword",
Expand Down

0 comments on commit 92500d6

Please sign in to comment.