Skip to content

Commit

Permalink
Rewrite parser as a character based scanner
Browse files Browse the repository at this point in the history
This may be kind of dumb, but it fixes the quote handling behaviour, and can be
rewritten as regexes later.
  • Loading branch information
grncdr authored and James Halliday committed Oct 18, 2013
1 parent 3418892 commit c7ca9a2
Showing 1 changed file with 106 additions and 20 deletions.
126 changes: 106 additions & 20 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,29 +54,115 @@ function parse (s, env) {
if (!match) return [];
if (!env) env = {};
return match.map(function (s) {
if (/^'/.test(s)) {
return s
.replace(/^'|'$/g, '')
.replace(/\\(["'\\$`(){}!#&*|])/g, '$1')
;
}
else if (/^"/.test(s)) {
return s.replace(/^"|"$/g, '')
.replace(/(^|[^\\])\$(\w+|[*@#?$!0_-])/g, getVar)
.replace(/(^|[^\\])\${(\w+|[*@#?$!0_-])}/g, getVar)
.replace(/\\([ "'\\$`(){}!#&*|])/g, '$1')
;
}
else if (RegExp('^' + CONTROL + '$').test(s)) {
if (RegExp('^' + CONTROL + '$').test(s)) {
return { op: s };
}
else return s.replace(
/(['"])((\\\1|[^\1])*?)\1|[^'"]+/g,
function (s, q) {
if (/^['"]/.test(s)) return parse(s, env);
return parse('"' + s + '"', env);

// Hand-written scanner/parser for Bash quoting rules:
//
// 1. inside single quotes, all characters are printed literally.
// 2. inside double quotes, all characters are printed literally
// except variables prefixed by '$' and double quotes prefixed by
// a backslash.
// 3. outside of any quotes, backslashes are not printed unless they
// are escaped.
// 4. quote context can switch mid-token if there is no whitespace
// between the two quote contexts (e.g. all'one'"token" parses as
// "allonetoken")

var SQ = "'";
var DQ = '"';
var BS = '\\';
var DS = '$';
var quote = false;
var varname = false;
var esc = false;
var out = '';
var isGlob = false;

for (var i = 0, len = s.length; i < len; i++) {
var c = s.charAt(i);
isGlob = isGlob || (!quote && (c === '*' || c === '?'))
if (esc) {
out += c;
esc = false;
}
else if (quote) {
if (c === quote) {
quote = false;
}
else if (quote == SQ) {
out += c;
}
else { // Double quote
if (c === BS) {
i += 1;
c = s.charAt(i);
if (c === DQ || c === BS || c === DS) {
out += c;
} else {
out += BS + c;
}
}
else if (c === DS) {
out += parseEnvVar();
}
else {
out += c
}
}
}
else if (c === DQ || c === SQ) {
quote = c
}
else if (RegExp('^' + CONTROL + '$').test(c)) {
return { op: s };
}
else if (c === BS) {
esc = true
}
else if (c === DS) {
out += parseEnvVar();
}
else out += c;
}

if (isGlob) return {op: 'glob', pattern: out};

return out

function parseEnvVar() {
i += 1;
var varend, varname;
//debugger
if (s.charAt(i) === '{') {
i += 1
if (s.charAt(i) === '}') {
throw new Error("Bad substitution: " + s.substr(i - 2, 3));
}
varend = s.indexOf('}', i);
if (varend < 0) {
throw new Error("Bad substitution: " + s.substr(i));
}
varname = s.substr(i, varend - i);
i = varend;
}
else if (/[*@#?$!_\-]/.test(s.charAt(i))) {
varname = s.charAt(i);
i += 1;
}
);
else {
varend = s.substr(i).match(/[^\w\d_]/);
if (!varend) {
varname = s.substr(i);
i = s.length;
} else {
varname = s.substr(i, varend.index)
i += varend.index - 1;
}
}
return getVar(null, '', varname);
}
});

function getVar (_, pre, key) {
Expand Down

0 comments on commit c7ca9a2

Please sign in to comment.