-
Notifications
You must be signed in to change notification settings - Fork 0
/
readability_cli.js
63 lines (47 loc) · 1.76 KB
/
readability_cli.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import { parse } from "https://deno.land/std@0.83.0/flags/mod.ts";
import { readFileSync, writeFile } from "node:fs";
import { Readability } from "./readability";
// import {JSDOM} from "npm:jsdom";
import { DOMParser } from "https://deno.land/x/deno_dom/deno-dom-wasm.ts"
const { sourcefile, outputfile } = parse(Deno.args);
function readFile(filePath) {
return readFileSync(filePath, { encoding: "utf-8" }).trim();
}
var source = readFile(sourcefile);
// source is a string of all the html
function sourceToDoc(source) {
// var window = new Window();
// window.write(source);
// var doc = new JSDOM(source, {
// url: "http://fakehost/test/page.html",
// }).window.document;
var doc = new DOMParser().parseFromString(source, "text/html");
removeCommentNodesRecursively(doc);
return doc;
}
function removeCommentNodesRecursively(node) {
for (var i = node.childNodes.length - 1; i >= 0; i--) {
var child = node.childNodes[i];
if (child.nodeType === child.COMMENT_NODE) {
node.removeChild(child);
} else if (child.nodeType === child.ELEMENT_NODE) {
removeCommentNodesRecursively(child);
}
}
}
var rd = Readability(sourceToDoc(source)).parse();
// remove file extension
var fileroot = outputfile.split('.').slice(0, -1).join('.');
var content = rd.content.replace(`<div id="readability-page-1" class="page"><div>`, `<div id="readability-page-1" class="page"><div>\n<h1>` + rd.title + `</h1>`);
writeFile(fileroot + ".html", content, err => {
if (err) {
console.error(err);
}
// file written successfully
});
writeFile(fileroot + ".txt", rd.title + "\n" + rd.textContent, err => {
if (err) {
console.error(err);
}
// file written successfully
});