-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
executable file
·118 lines (104 loc) · 3.03 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env node
/**
* External dependencies
*/
const fs = require( 'fs' );
const path = require( 'path' );
const mammoth = require( 'mammoth' );
/**
* Constants
*/
const INPUT_DIR = 'input';
const OUTPUT_DIR = 'output';
/**
* Converts docx files to html.
*
* @param {string} filePath
*/
const convert = ( filePath ) => {
// @TODO determine if Word uses the same style name for block quotations.
const options = {
styleMap: [
"p[style-name='Quotations'] => gblockquote",
"p[style-name='Quote'] => gblockquote",
]
};
return new Promise( ( resolve ) => {
mammoth.convertToHtml( { path: filePath }, options )
.then( function( result ) {
const html = result.value;
resolve( html );
} )
.done();
} );
};
/**
* Clean and formats the raw HTML for Nursing Clio WP.
*
* @param {string} rawHtml
*/
const cleanHtml = ( rawHtml ) => {
return new Promise( ( resolve ) => {
const html = rawHtml.replace( /<\/ol>$/, '</ol>\n</section>' )
.replace( /<ol><li id="footnote/, '<section id="sources">\n<h4>Notes</h4><ol class="footnotes"><li id="footnote' )
.replace( /<li id="footnote-(\d+)">.*?<p>/g, '<li id="footnote-$1">' )
.replace( /<\/a><\/p><\/li>/g, '</a></li>' )
.replace( /<p>/g, '' )
.replace( /<\/p>/g, '\n\n' )
.replace( /<sup><sup>/g, '<sup>' )
.replace( /<\/sup><\/sup>/g, '</sup>' )
.replace( /id="footnote-ref/g, 'class="footnote-ref" id="footnote-ref' )
.replace( /">↑/g, '" class="return-link">Return to text.' )
.replace( /<gblockquote>/g, '[gblockquote]' )
.replace( /<\/gblockquote>/g, '[/gblockquote]\n\n')
.replace( /\t/g, ' ' )
.replace( / \n/g, '' )
.replace( /\n+/g, '\n\n' )
.replace( /<\/li>/g, '</li>\n' )
.replace( /<\/h(\d)>/g, '</h$1>\n\n' );
resolve( html );
} );
};
/**
* Writes the HTML content to the output directory.
*
* @param {string} content
* @param {string} file
*/
const writeFile = ( content, file ) => {
const fileName = path.basename( file, '.docx' );
const outputPath = path.format( {
dir: OUTPUT_DIR,
name: fileName,
ext: '.html',
} );
fs.writeFile( outputPath, content, err => {
if ( err ) {
return console.log( err );
}
} );
};
const init = async function() {
try {
const files = await fs.promises.readdir( INPUT_DIR );
for( const file of files ) {
const filePath = path.join( INPUT_DIR, file );
const stat = await fs.promises.stat( filePath );
// Only process docx files.
if ( stat.isFile() && '.docx' === path.extname( file ) ) {
// Convert the docx to html using Mammoth.
const rawHtml = await convert( filePath );
// Run text replacements.
const html = await cleanHtml( rawHtml );
// Write content to output.
writeFile( html, file );
} else if ( '.gitkeep' !== file ) {
console.log( `Ignored "${file}": Only docx files can be converted. Other files and folders will be skipped.` );
}
}
}
catch( e ) {
console.error( 'Oops', e );
}
};
init();