Skip to content

Commit

Permalink
regex
Browse files Browse the repository at this point in the history
  • Loading branch information
Kodiologist committed Jul 19, 2017
1 parent 9e71c00 commit a621754
Showing 1 changed file with 34 additions and 23 deletions.
57 changes: 34 additions & 23 deletions hy/lex/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,23 @@
# This file is part of Hy, which is free software licensed under the Expat
# license. See the LICENSE.

import re
import regex
from rply import LexerGenerator
import rply.lexergenerator


lg = LexerGenerator()

class RegexRule(rply.lexergenerator.Rule):
"""Like rply.lexergenerator.Rule, but uses the `regex` module instead
of Python's `re`."""
def __init__(self, name, pattern, flags=0):
self.name = name
self.re = regex.compile(pattern, flags=flags)
def add(name, pattern, flags=0):
lg.rules.append(RegexRule(name, pattern, flags=flags))
def ignore(pattern, flags=0):
lg.ignore_rules.append(RegexRule("", pattern, flags=flags))

# A regexp for something that should end a quoting/unquoting operator
# i.e. a space or a closing brace/paren/curly
Expand All @@ -21,26 +32,26 @@
# BidiBrackets.txt
("(", ")"), ("[", "]"), ("{", "}"), ("༺", "༻"), ("༼", "༽"), ("᚛", "᚜"), ("⁅", "⁆"), ("⁽", "⁾"), ("₍", "₎"), ("⌈", "⌉"), ("⌊", "⌋"), ("〈", "〉"), ("❨", "❩"), ("❪", "❫"), ("❬", "❭"), ("❮", "❯"), ("❰", "❱"), ("❲", "❳"), ("❴", "❵"), ("⟅", "⟆"), ("⟦", "⟧"), ("⟨", "⟩"), ("⟪", "⟫"), ("⟬", "⟭"), ("⟮", "⟯"), ("⦃", "⦄"), ("⦅", "⦆"), ("⦇", "⦈"), ("⦉", "⦊"), ("⦋", "⦌"), ("⦍", "⦐"), ("⦏", "⦎"), ("⦑", "⦒"), ("⦓", "⦔"), ("⦕", "⦖"), ("⦗", "⦘"), ("⧘", "⧙"), ("⧚", "⧛"), ("⧼", "⧽"), ("⸢", "⸣"), ("⸤", "⸥"), ("⸦", "⸧"), ("⸨", "⸩"), ("〈", "〉"), ("《", "》"), ("「", "」"), ("『", "』"), ("【", "】"), ("〔", "〕"), ("〖", "〗"), ("〘", "〙"), ("〚", "〛"), ("﹙", "﹚"), ("﹛", "﹜"), ("﹝", "﹞"), ("(", ")"), ("[", "]"), ("{", "}"), ("⦅", "⦆"), ("「", "」")) # noqa

lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('LCURLY', r'\{')
lg.add('RCURLY', r'\}')
lg.add('HLCURLY', r'#\{')
lg.add('QUOTE', r'\'%s' % end_quote)
lg.add('QUASIQUOTE', r'`%s' % end_quote)
lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
lg.add('UNQUOTE', r'~%s' % end_quote)
lg.add('HASHBANG', r'#!.*[^\r\n]')
add('LPAREN', r'\(')
add('RPAREN', r'\)')
add('LBRACKET', r'\[')
add('RBRACKET', r'\]')
add('LCURLY', r'\{')
add('RCURLY', r'\}')
add('HLCURLY', r'#\{')
add('QUOTE', r'\'%s' % end_quote)
add('QUASIQUOTE', r'`%s' % end_quote)
add('UNQUOTESPLICE', r'~@%s' % end_quote)
add('UNQUOTE', r'~%s' % end_quote)
add('HASHBANG', r'#!.*[^\r\n]')

for opener, closer in hashstring_paired_delims:
lg.add('HASHSTRING', r'#q{}(?:.|\n)*?{}'.format(
re.escape(opener), re.escape(closer)))
lg.add('HASHSTRING', r'#q([^{}])(?:.|\n)*?\1'.format(''.join(
re.escape(opener) for opener, _ in hashstring_paired_delims)))
add('HASHSTRING', r'#q{}(?:.|\n)*?{}'.format(
regex.escape(opener), regex.escape(closer)))
add('HASHSTRING', r'#q([^{}])(?:.|\n)*?\1'.format(''.join(
regex.escape(opener) for opener, _ in hashstring_paired_delims)))

lg.add('HASHOTHER', r'#%s' % identifier)
add('HASHOTHER', r'#%s' % identifier)

# A regexp which matches incomplete strings, used to support
# multi-line strings in the interpreter
Expand All @@ -56,14 +67,14 @@
)* # one or more times
'''

lg.add('STRING', r'%s"' % partial_string)
lg.add('PARTIAL_STRING', partial_string)
add('STRING', r'%s"' % partial_string)
add('PARTIAL_STRING', partial_string)

lg.add('IDENTIFIER', identifier)
add('IDENTIFIER', identifier)


lg.ignore(r';.*(?=\r|\n|$)')
lg.ignore(r'\s+')
ignore(r';.*(?=\r|\n|$)')
ignore(r'\s+')


lexer = lg.build()

0 comments on commit a621754

Please sign in to comment.