Skip to content

Commit

Permalink
tokenizer: add support for using unimplemented nodes for array assign…
Browse files Browse the repository at this point in the history
…ment (fixes idank#88)
  • Loading branch information
Tom O'Hara committed Jun 24, 2023
1 parent 81a0580 commit dcf9315
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 5 deletions.
1 change: 1 addition & 0 deletions bashlex/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,5 @@
'ASSNGLOBAL', # word is a global assignment to declare (declare/typeset -g)
'NOBRACE', # Don't perform brace expansion
'ASSIGNINT', # word is an integer assignment to declare
'UNIMPLEMENTED', # word uses unimplemented feature (e.g., array)
])
3 changes: 3 additions & 0 deletions bashlex/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ def p_simple_command_element(p):
# change the word node to an assignment if necessary
if p.slice[1].ttype == tokenizer.tokentype.ASSIGNMENT_WORD:
p[0][0].kind = 'assignment'
if (p.slice[1].flags & flags.word.UNIMPLEMENTED):
p[0][0].kind = 'unimplemented'

def p_redirection_list(p):
'''redirection_list : redirection
Expand Down Expand Up @@ -720,6 +722,7 @@ def __init__(self, s, strictmode=True, expansionlimit=None, tokenizerargs=None,
self.tok = tokenizer.tokenizer(s,
parserstate=self.parserstate,
strictmode=strictmode,
proceedonerror=proceedonerror,
**tokenizerargs)

self.redirstack = self.tok.redirstack
Expand Down
27 changes: 24 additions & 3 deletions bashlex/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ def nopos(self):

class tokenizer(object):
def __init__(self, s, parserstate, strictmode=True, eoftoken=None,
lastreadtoken=None, tokenbeforethat=None, twotokensago=None):
lastreadtoken=None, tokenbeforethat=None, twotokensago=None,
proceedonerror=None):
self._shell_eof_token = eoftoken
self._shell_input_line = s
self._added_newline = False
Expand Down Expand Up @@ -232,6 +233,7 @@ def __init__(self, s, parserstate, strictmode=True, eoftoken=None,
self._positions = []

self._strictmode = strictmode
self._proceedonerror = proceedonerror

# hack: the tokenizer needs access to the stack of redirection
# nodes when it reads heredocs. this instance is shared between
Expand Down Expand Up @@ -391,7 +393,7 @@ def _readtoken(self):
def _readtokenword(self, c):
d = {}
d['all_digit_token'] = c.isdigit()
d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = False
d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = d['unimplemented'] = False

tokenword = []

Expand Down Expand Up @@ -467,6 +469,19 @@ def handleshellexp():

# bashlex/parse.y L4699 ARRAY_VARS

def handlecompoundassignment():
# note: only finds matching parenthesis, so parsing can proceed
handled = False
if self._proceedonerror:
ttok = self._parse_matched_pair(None, '(', ')')
if ttok:
tokenword.append(c)
tokenword.extend(ttok)
d['compound_assignment'] = True
d['unimplemented'] = True
handled = True
return handled

def handleescapedchar():
tokenword.append(c)
d['all_digit_token'] &= c.isdigit()
Expand Down Expand Up @@ -512,6 +527,8 @@ def handleescapedchar():
elif _shellexp(c):
gotonext = not handleshellexp()
# bashlex/parse.y L4699
elif c == '(' and handlecompoundassignment():
gotonext = True
if not gotonext:
if _shellbreak(c):
self._ungetc(c)
Expand Down Expand Up @@ -573,14 +590,18 @@ def handleescapedchar():
tokenword.flags.add(wordflags.HASDOLLAR)
if d['quoted']:
tokenword.flags.add(wordflags.QUOTED)
if d['compound_assignment'] and tokenword[-1] == ')':
if d['compound_assignment'] and tokenword.value[-1] == ')':
tokenword.flags.add(wordflags.COMPASSIGN)
if self._is_assignment(tokenword.value, bool(self._parserstate & parserflags.COMPASSIGN)):
tokenword.flags.add(wordflags.ASSIGNMENT)
if self._assignment_acceptable(self._last_read_token):
tokenword.flags.add(wordflags.NOSPLIT)
if self._parserstate & parserflags.COMPASSIGN:
tokenword.flags.add(wordflags.NOGLOB)
if d['compound_assignment']:
tokenword.flags.add(wordflags.ASSIGNARRAY)
if d['unimplemented']:
tokenword.flags.add(wordflags.UNIMPLEMENTED)

# bashlex/parse.y L4865
if self._command_token_position(self._last_read_token):
Expand Down
15 changes: 13 additions & 2 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def patternnode(s, *parts):
def functionnode(s, name, body, *parts):
return ast.node(kind='function', name=name, body=body, parts=list(parts), s=s)

def unimplementednode(s, *parts):
return ast.node(kind='unimplemented', parts=list(parts), s=s)
def unimplementednode(s, *parts, **kwargs):
return ast.node(kind='unimplemented', parts=list(parts), s=s, **kwargs)

class test_parser(unittest.TestCase):

Expand Down Expand Up @@ -1250,3 +1250,14 @@ def test_unimplemented(self):
proceedonerror=True)
with self.assertRaises(NotImplementedError):
parse(s, proceedonerror=False)

def test_array_assignemnt(self):
s = "num1=2 arr=(1 2 3) num2=3"
self.assertASTEquals(s,
commandnode(s,
assignmentnode('num1=2', 'num1=2'),
unimplementednode('arr=(1 2 3)', word='arr=(1 2 3)'),
assignmentnode('num2=3', 'num2=3')),
proceedonerror=True)
with self.assertRaises(errors.ParsingError):
parse(s, proceedonerror=False)

0 comments on commit dcf9315

Please sign in to comment.