From 553fdfc32cc41b4c2f77e061b6957703958ca575 Mon Sep 17 00:00:00 2001 From: Jordan Harband Date: Mon, 30 Jan 2023 14:22:58 -0800 Subject: [PATCH] [New] extract `parse` and `quote` to their own deep imports --- README.md | 16 ++--- index.js | 203 +----------------------------------------------------- parse.js | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++ quote.js | 16 +++++ 4 files changed, 216 insertions(+), 209 deletions(-) create mode 100644 parse.js create mode 100644 quote.js diff --git a/README.md b/README.md index 3d86a2e..c2f6e08 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Parse and quote shell commands. ## quote ``` js -var quote = require('shell-quote').quote; +var quote = require('shell-quote/quote'); var s = quote([ 'a', 'b c d', '$f', '"g"' ]); console.log(s); ``` @@ -28,7 +28,7 @@ a 'b c d' \$f '"g"' ## parse ``` js -var parse = require('shell-quote').parse; +var parse = require('shell-quote/parse'); var xs = parse('a "b c" \\$def \'it\\\'s great\''); console.dir(xs); ``` @@ -42,7 +42,7 @@ output ## parse with an environment variable ``` js -var parse = require('shell-quote').parse; +var parse = require('shell-quote/parse'); var xs = parse('beep --boop="$PWD"', { PWD: '/home/robot' }); console.dir(xs); ``` @@ -56,7 +56,7 @@ output ## parse with custom escape character ``` js -var parse = require('shell-quote').parse; +var parse = require('shell-quote/parse'); var xs = parse('beep --boop="$PWD"', { PWD: '/home/robot' }, { escape: '^' }); console.dir(xs); ``` @@ -70,7 +70,7 @@ output ## parsing shell operators ``` js -var parse = require('shell-quote').parse; +var parse = require('shell-quote/parse'); var xs = parse('beep || boop > /byte'); console.dir(xs); ``` @@ -84,7 +84,7 @@ output: ## parsing shell comment ``` js -var parse = require('shell-quote').parse; +var parse = require('shell-quote/parse'); var xs = parse('beep > boop # > kaboom'); console.dir(xs); ``` @@ -98,8 +98,8 @@ output: # methods ``` js -var quote = require('shell-quote').quote; -var parse = require('shell-quote').parse; +var quote = require('shell-quote/quote'); +var parse = require('shell-quote/parse'); ``` ## quote(args) diff --git a/index.js b/index.js index 0a9ae2d..28fb42d 100644 --- a/index.js +++ b/index.js @@ -1,203 +1,4 @@ 'use strict'; -exports.quote = function (xs) { - return xs.map(function (s) { - if (s && typeof s === 'object') { - return s.op.replace(/(.)/g, '\\$1'); - } else if ((/["\s]/).test(s) && !(/'/).test(s)) { - return "'" + s.replace(/(['\\])/g, '\\$1') + "'"; - } else if ((/["'\s]/).test(s)) { - return '"' + s.replace(/(["\\$`!])/g, '\\$1') + '"'; - } - return String(s).replace(/([A-Za-z]:)?([#!"$&'()*,:;<=>?@[\\\]^`{|}])/g, '$1\\$2'); - }).join(' '); -}; - -// '<(' is process substitution operator and -// can be parsed the same as control operator -var CONTROL = '(?:' + [ - '\\|\\|', '\\&\\&', ';;', '\\|\\&', '\\<\\(', '>>', '>\\&', '[&;()|<>]' -].join('|') + ')'; -var META = '|&;()<> \\t'; -var BAREWORD = '(\\\\[\'"' + META + ']|[^\\s\'"' + META + '])+'; -var SINGLE_QUOTE = '"((\\\\"|[^"])*?)"'; -var DOUBLE_QUOTE = '\'((\\\\\'|[^\'])*?)\''; - -var TOKEN = ''; -for (var i = 0; i < 4; i++) { - TOKEN += (Math.pow(16, 8) * Math.random()).toString(16); -} - -function parse(s, env, opts) { - var chunker = new RegExp([ - '(' + CONTROL + ')', // control chars - '(' + BAREWORD + '|' + SINGLE_QUOTE + '|' + DOUBLE_QUOTE + ')*' - ].join('|'), 'g'); - var match = s.match(chunker).filter(Boolean); - - if (!match) { - return []; - } - if (!env) { - env = {}; - } - if (!opts) { - opts = {}; - } - - var commented = false; - - function getVar(_, pre, key) { - var r = typeof env === 'function' ? env(key) : env[key]; - if (r === undefined && key != '') { - r = ''; - } else if (r === undefined) { - r = '$'; - } - - if (typeof r === 'object') { - return pre + TOKEN + JSON.stringify(r) + TOKEN; - } - return pre + r; - } - - return match.map(function (s, j) { - if (commented) { - return void undefined; - } - if (RegExp('^' + CONTROL + '$').test(s)) { - return { op: s }; - } - - // Hand-written scanner/parser for Bash quoting rules: - // - // 1. inside single quotes, all characters are printed literally. - // 2. inside double quotes, all characters are printed literally - // except variables prefixed by '$' and backslashes followed by - // either a double quote or another backslash. - // 3. outside of any quotes, backslashes are treated as escape - // characters and not printed (unless they are themselves escaped) - // 4. quote context can switch mid-token if there is no whitespace - // between the two quote contexts (e.g. all'one'"token" parses as - // "allonetoken") - var SQ = "'"; - var DQ = '"'; - var DS = '$'; - var BS = opts.escape || '\\'; - var quote = false; - var esc = false; - var out = ''; - var isGlob = false; - var i; - - function parseEnvVar() { - i += 1; - var varend; - var varname; - // debugger - if (s.charAt(i) === '{') { - i += 1; - if (s.charAt(i) === '}') { - throw new Error('Bad substitution: ' + s.substr(i - 2, 3)); - } - varend = s.indexOf('}', i); - if (varend < 0) { - throw new Error('Bad substitution: ' + s.substr(i)); - } - varname = s.substr(i, varend - i); - i = varend; - } else if ((/[*@#?$!_-]/).test(s.charAt(i))) { - varname = s.charAt(i); - i += 1; - } else { - varend = s.substr(i).match(/[^\w\d_]/); - if (!varend) { - varname = s.substr(i); - i = s.length; - } else { - varname = s.substr(i, varend.index); - i += varend.index - 1; - } - } - return getVar(null, '', varname); - } - - for (i = 0; i < s.length; i++) { - var c = s.charAt(i); - isGlob = isGlob || (!quote && (c === '*' || c === '?')); - if (esc) { - out += c; - esc = false; - } else if (quote) { - if (c === quote) { - quote = false; - } else if (quote == SQ) { - out += c; - } else { // Double quote - if (c === BS) { - i += 1; - c = s.charAt(i); - if (c === DQ || c === BS || c === DS) { - out += c; - } else { - out += BS + c; - } - } else if (c === DS) { - out += parseEnvVar(); - } else { - out += c; - } - } - } else if (c === DQ || c === SQ) { - quote = c; - } else if (RegExp('^' + CONTROL + '$').test(c)) { - return { op: s }; - } else if ((/^#$/).test(c)) { - commented = true; - if (out.length) { - return [out, { comment: s.slice(i + 1) + match.slice(j + 1).join(' ') }]; - } - return [{ comment: s.slice(i + 1) + match.slice(j + 1).join(' ') }]; - } else if (c === BS) { - esc = true; - } else if (c === DS) { - out += parseEnvVar(); - } else { - out += c; - } - } - - if (isGlob) { - return { op: 'glob', pattern: out }; - } - - return out; - }).reduce(function (prev, arg) { // finalize parsed aruments - if (arg === undefined) { - return prev; - } - return prev.concat(arg); - }, []); -} - -exports.parse = function (s, env, opts) { - var mapped = parse(s, env, opts); - if (typeof env !== 'function') { - return mapped; - } - return mapped.reduce(function (acc, s) { - if (typeof s === 'object') { - return acc.concat(s); - } - var xs = s.split(RegExp('(' + TOKEN + '.*?' + TOKEN + ')', 'g')); - if (xs.length === 1) { - return acc.concat(xs[0]); - } - return acc.concat(xs.filter(Boolean).map(function (x) { - if (RegExp('^' + TOKEN).test(x)) { - return JSON.parse(x.split(TOKEN)[1]); - } - return x; - })); - }, []); -}; +exports.quote = require('./quote'); +exports.parse = require('./parse'); diff --git a/parse.js b/parse.js new file mode 100644 index 0000000..3ea9c1b --- /dev/null +++ b/parse.js @@ -0,0 +1,190 @@ +'use strict'; + +// '<(' is process substitution operator and +// can be parsed the same as control operator +var CONTROL = '(?:' + [ + '\\|\\|', '\\&\\&', ';;', '\\|\\&', '\\<\\(', '>>', '>\\&', '[&;()|<>]' +].join('|') + ')'; +var META = '|&;()<> \\t'; +var BAREWORD = '(\\\\[\'"' + META + ']|[^\\s\'"' + META + '])+'; +var SINGLE_QUOTE = '"((\\\\"|[^"])*?)"'; +var DOUBLE_QUOTE = '\'((\\\\\'|[^\'])*?)\''; + +var TOKEN = ''; +for (var i = 0; i < 4; i++) { + TOKEN += (Math.pow(16, 8) * Math.random()).toString(16); +} + +function parseInternal(s, env, opts) { + var chunker = new RegExp([ + '(' + CONTROL + ')', // control chars + '(' + BAREWORD + '|' + SINGLE_QUOTE + '|' + DOUBLE_QUOTE + ')*' + ].join('|'), 'g'); + var match = s.match(chunker).filter(Boolean); + + if (!match) { + return []; + } + if (!env) { + env = {}; + } + if (!opts) { + opts = {}; + } + + var commented = false; + + function getVar(_, pre, key) { + var r = typeof env === 'function' ? env(key) : env[key]; + if (r === undefined && key != '') { + r = ''; + } else if (r === undefined) { + r = '$'; + } + + if (typeof r === 'object') { + return pre + TOKEN + JSON.stringify(r) + TOKEN; + } + return pre + r; + } + + return match.map(function (s, j) { + if (commented) { + return void undefined; + } + if (RegExp('^' + CONTROL + '$').test(s)) { + return { op: s }; + } + + // Hand-written scanner/parser for Bash quoting rules: + // + // 1. inside single quotes, all characters are printed literally. + // 2. inside double quotes, all characters are printed literally + // except variables prefixed by '$' and backslashes followed by + // either a double quote or another backslash. + // 3. outside of any quotes, backslashes are treated as escape + // characters and not printed (unless they are themselves escaped) + // 4. quote context can switch mid-token if there is no whitespace + // between the two quote contexts (e.g. all'one'"token" parses as + // "allonetoken") + var SQ = "'"; + var DQ = '"'; + var DS = '$'; + var BS = opts.escape || '\\'; + var quote = false; + var esc = false; + var out = ''; + var isGlob = false; + var i; + + function parseEnvVar() { + i += 1; + var varend; + var varname; + // debugger + if (s.charAt(i) === '{') { + i += 1; + if (s.charAt(i) === '}') { + throw new Error('Bad substitution: ' + s.substr(i - 2, 3)); + } + varend = s.indexOf('}', i); + if (varend < 0) { + throw new Error('Bad substitution: ' + s.substr(i)); + } + varname = s.substr(i, varend - i); + i = varend; + } else if ((/[*@#?$!_-]/).test(s.charAt(i))) { + varname = s.charAt(i); + i += 1; + } else { + varend = s.substr(i).match(/[^\w\d_]/); + if (!varend) { + varname = s.substr(i); + i = s.length; + } else { + varname = s.substr(i, varend.index); + i += varend.index - 1; + } + } + return getVar(null, '', varname); + } + + for (i = 0; i < s.length; i++) { + var c = s.charAt(i); + isGlob = isGlob || (!quote && (c === '*' || c === '?')); + if (esc) { + out += c; + esc = false; + } else if (quote) { + if (c === quote) { + quote = false; + } else if (quote == SQ) { + out += c; + } else { // Double quote + if (c === BS) { + i += 1; + c = s.charAt(i); + if (c === DQ || c === BS || c === DS) { + out += c; + } else { + out += BS + c; + } + } else if (c === DS) { + out += parseEnvVar(); + } else { + out += c; + } + } + } else if (c === DQ || c === SQ) { + quote = c; + } else if (RegExp('^' + CONTROL + '$').test(c)) { + return { op: s }; + } else if ((/^#$/).test(c)) { + commented = true; + if (out.length) { + return [out, { comment: s.slice(i + 1) + match.slice(j + 1).join(' ') }]; + } + return [{ comment: s.slice(i + 1) + match.slice(j + 1).join(' ') }]; + } else if (c === BS) { + esc = true; + } else if (c === DS) { + out += parseEnvVar(); + } else { + out += c; + } + } + + if (isGlob) { + return { op: 'glob', pattern: out }; + } + + return out; + }).reduce(function (prev, arg) { // finalize parsed aruments + if (arg === undefined) { + return prev; + } + return prev.concat(arg); + }, []); +} + +module.exports = function parse(s, env, opts) { + var mapped = parseInternal(s, env, opts); + if (typeof env !== 'function') { + return mapped; + } + return mapped.reduce(function (acc, s) { + if (typeof s === 'object') { + return acc.concat(s); + } + var xs = s.split(RegExp('(' + TOKEN + '.*?' + TOKEN + ')', 'g')); + if (xs.length === 1) { + return acc.concat(xs[0]); + } + return acc.concat(xs.filter(Boolean).map(function (x) { + if (RegExp('^' + TOKEN).test(x)) { + return JSON.parse(x.split(TOKEN)[1]); + } + return x; + })); + }, []); +}; diff --git a/quote.js b/quote.js new file mode 100644 index 0000000..afbccf0 --- /dev/null +++ b/quote.js @@ -0,0 +1,16 @@ +'use strict'; + +module.exports = function quote(xs) { + return xs.map(function (s) { + if (s && typeof s === 'object') { + return s.op.replace(/(.)/g, '\\$1'); + } + if ((/["\s]/).test(s) && !(/'/).test(s)) { + return "'" + s.replace(/(['\\])/g, '\\$1') + "'"; + } + if ((/["'\s]/).test(s)) { + return '"' + s.replace(/(["\\$`!])/g, '\\$1') + '"'; + } + return String(s).replace(/([A-Za-z]:)?([#!"$&'()*,:;<=>?@[\\\]^`{|}])/g, '$1\\$2'); + }).join(' '); +};