From c7ca9a200350c02fb86c4222b63f15b54a0a0226 Mon Sep 17 00:00:00 2001 From: Stephen Sugden Date: Thu, 17 Oct 2013 17:58:42 -0700 Subject: [PATCH] Rewrite parser as a character based scanner This may be kind of dumb, but it fixes the quote handling behaviour, and can be rewritten as regexes later. --- index.js | 126 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 106 insertions(+), 20 deletions(-) diff --git a/index.js b/index.js index 604c51a..7ecd705 100644 --- a/index.js +++ b/index.js @@ -54,29 +54,115 @@ function parse (s, env) { if (!match) return []; if (!env) env = {}; return match.map(function (s) { - if (/^'/.test(s)) { - return s - .replace(/^'|'$/g, '') - .replace(/\\(["'\\$`(){}!#&*|])/g, '$1') - ; - } - else if (/^"/.test(s)) { - return s.replace(/^"|"$/g, '') - .replace(/(^|[^\\])\$(\w+|[*@#?$!0_-])/g, getVar) - .replace(/(^|[^\\])\${(\w+|[*@#?$!0_-])}/g, getVar) - .replace(/\\([ "'\\$`(){}!#&*|])/g, '$1') - ; - } - else if (RegExp('^' + CONTROL + '$').test(s)) { + if (RegExp('^' + CONTROL + '$').test(s)) { return { op: s }; } - else return s.replace( - /(['"])((\\\1|[^\1])*?)\1|[^'"]+/g, - function (s, q) { - if (/^['"]/.test(s)) return parse(s, env); - return parse('"' + s + '"', env); + + // Hand-written scanner/parser for Bash quoting rules: + // + // 1. inside single quotes, all characters are printed literally. + // 2. inside double quotes, all characters are printed literally + // except variables prefixed by '$' and double quotes prefixed by + // a backslash. + // 3. outside of any quotes, backslashes are not printed unless they + // are escaped. + // 4. quote context can switch mid-token if there is no whitespace + // between the two quote contexts (e.g. all'one'"token" parses as + // "allonetoken") + + var SQ = "'"; + var DQ = '"'; + var BS = '\\'; + var DS = '$'; + var quote = false; + var varname = false; + var esc = false; + var out = ''; + var isGlob = false; + + for (var i = 0, len = s.length; i < len; i++) { + var c = s.charAt(i); + isGlob = isGlob || (!quote && (c === '*' || c === '?')) + if (esc) { + out += c; + esc = false; + } + else if (quote) { + if (c === quote) { + quote = false; + } + else if (quote == SQ) { + out += c; + } + else { // Double quote + if (c === BS) { + i += 1; + c = s.charAt(i); + if (c === DQ || c === BS || c === DS) { + out += c; + } else { + out += BS + c; + } + } + else if (c === DS) { + out += parseEnvVar(); + } + else { + out += c + } + } + } + else if (c === DQ || c === SQ) { + quote = c + } + else if (RegExp('^' + CONTROL + '$').test(c)) { + return { op: s }; + } + else if (c === BS) { + esc = true + } + else if (c === DS) { + out += parseEnvVar(); + } + else out += c; + } + + if (isGlob) return {op: 'glob', pattern: out}; + + return out + + function parseEnvVar() { + i += 1; + var varend, varname; + //debugger + if (s.charAt(i) === '{') { + i += 1 + if (s.charAt(i) === '}') { + throw new Error("Bad substitution: " + s.substr(i - 2, 3)); + } + varend = s.indexOf('}', i); + if (varend < 0) { + throw new Error("Bad substitution: " + s.substr(i)); + } + varname = s.substr(i, varend - i); + i = varend; + } + else if (/[*@#?$!_\-]/.test(s.charAt(i))) { + varname = s.charAt(i); + i += 1; } - ); + else { + varend = s.substr(i).match(/[^\w\d_]/); + if (!varend) { + varname = s.substr(i); + i = s.length; + } else { + varname = s.substr(i, varend.index) + i += varend.index - 1; + } + } + return getVar(null, '', varname); + } }); function getVar (_, pre, key) {