diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..684a473 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,15 @@ +[run] +relative_files = True +branch = True +omit = + test_*.py + src/parsec/tests/* + +# coverage.py does not currenly handle @overload decorated methods gracefully. +# overloaded methods should be ignored because they are not allowed to contain code +[report] +exclude_lines = + pragma: not covered + @overload + \.\.\. + if TYPE_CHECKING: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4b30583..311e62d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: ['3.5', '3.6', '3.7', '3.8', '3.9', '3.10'] + python-version: ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v2 @@ -19,4 +19,11 @@ jobs: with: python-version: ${{ matrix.python-version }} - - run: python setup.py test + - run: pip install -e .[dev] + + - run: coverage run setup.py test + - run: coverage report -m + + - if: ${{ matrix.python-version != '3.6' && matrix.python-version != '3.7' }} + run: mypy . + diff --git a/.gitignore b/.gitignore index db4561e..9332007 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,6 @@ docs/_build/ # PyBuilder target/ + +# venv +.venv/ diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..d794d83 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,15 @@ +[mypy] +pretty = True +mypy_path = $MYPY_CONFIG_FILE_DIR/src +packages = parsec +exclude = docs/|examples/|build/lib|src/parsec/tests + +explicit_package_bases = True +check_untyped_defs = True +implicit_reexport = True +show_error_codes = True +show_column_numbers = True +follow_imports = silent + +warn_redundant_casts = True +warn_unused_ignores = True diff --git a/setup.py b/setup.py index 97c2b94..cb9a35d 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,11 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from setuptools import setup, find_packages +from setuptools import setup, find_packages # type: ignore[import-untyped] setup( name = 'parsec', - version = '3.15', + version = '3.16', description = 'parser combinator.', long_description = 'A universal Python parser combinator library inspired by Parsec library of Haskell.', author = 'He Tao', @@ -33,6 +33,7 @@ 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'License :: OSI Approved :: MIT License', ], platforms = 'any', @@ -40,7 +41,14 @@ install_requires = [ 'enum34; python_version < "3.5"', + 'setuptools', ], + extras_require={ + 'dev': [ + 'mypy', + 'coverage', + ], + }, package_dir = {'': 'src'}, packages = find_packages('src'), package_data = {'': ('py.typed', '*.pyi')}, diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index 698a4f8..69ef13d 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -7,15 +7,27 @@ __author__ = 'He Tao, sighingnow@gmail.com' +try: + from inspect import getfullargspec as getargspec +except ImportError: + from inspect import getargspec as getargspec + +import operator import re +import inspect import warnings -from functools import wraps +from functools import reduce, wraps from collections import namedtuple ########################################################################## # Text.Parsec.Error ########################################################################## +def expected_arguments(callable): + if inspect.isbuiltin(callable): + # NOTE: we cannot perform introspection on builtins + return 1 + return len(getargspec(callable).args) class ParseError(RuntimeError): '''Type for parse error.''' @@ -74,7 +86,7 @@ def aggregate(self, other=None): return self if not other.status: return other - return Value(True, other.index, self.value + other.value, None) + return Value.success(other.index, self.value + other.value) def update_index(self, index=None): if index is None: @@ -85,15 +97,14 @@ def update_index(self, index=None): @staticmethod def combinate(values): '''Aggregate multiple values into tuple''' - prev_v = None + if not values: + raise TypeError("cannot call combinate without any value") + for v in values: - if prev_v: - if not v: - return prev_v if not v.status: return v out_values = tuple([v.value for v in values]) - return Value(True, values[-1].index, out_values, None) + return Value.success(values[-1].index, out_values) def __str__(self): return 'Value: state: {}, @index: {}, values: {}, expected: {}'.format( @@ -122,6 +133,11 @@ def __call__(self, text, index): '''call wrapped function.''' return self.fn(text, index) + def __repr__(self): + if hasattr(self.fn, "__name__"): + return self.fn.__name__ + return super().__repr__() + def parse(self, text): '''Parses a given string `text`.''' return self.parse_partial(text)[0] @@ -155,10 +171,17 @@ def bind(self, fn): parser is successful, passes the result to fn, and continues with the parser returned from fn. ''' + args_count = expected_arguments(fn) + if not 1 <= args_count <= 2: + raise TypeError("can only bind on a function with one or two arguments, fn/{}".format(args_count)) + @Parser def bind_parser(text, index): res = self(text, index) - return res if not res.status else fn(res.value)(text, res.index) + if not res.status: + return res + + return (fn(res.value, index) if args_count == 2 else fn(res.value))(text, res.index) return bind_parser def compose(self, other): @@ -244,36 +267,51 @@ def excepts_parser(text, index): return res return excepts_parser - def parsecmap(self, fn): + def parsecmap(self, fn, star=False): '''Returns a parser that transforms the produced value of parser with `fn`.''' - return self.bind(lambda res: Parser(lambda _, index: Value.success(index, fn(res)))) + def mapper(res): + # unpack tuple + result = fn(*res) if star else fn(res) + return success_with(result, advance=False) + return self.bind(mapper) + + def map(self, fn, star=False): + '''Functor map on the parsed value with `fn`. + Alias to parsecmap + ''' + return self.parsecmap(fn, star=star) def parsecapp(self, other): '''Returns a parser that applies the produced value of this parser to the produced value of `other`.''' # pylint: disable=unnecessary-lambda return self.bind(lambda res: other.parsecmap(lambda x: res(x))) + def apply(self, other): + '''Apply the function produced by self on the result of other. + Alias to parsecapp + ''' + return self.parsecapp(other) + def result(self, res): '''Return a value according to the parameter `res` when parse successfully.''' - return self >> Parser(lambda _, index: Value.success(index, res)) + return self >> success_with(res, advance=False) def mark(self): '''Mark the line and column information of the result of this parser.''' def pos(text, index): return ParseError.loc_info(text, index) - @Parser - def mark_parser(text, index): - res = self(text, index) - if res.status: - return Value.success(res.index, (pos(text, index), res.value, pos(text, res.index))) - else: - return res # failed. - return mark_parser + def mark(value, index): + @Parser + def mark(text, resultant_index): + return Value.success(resultant_index, (pos(text, index), value, pos(text, resultant_index))) + return mark + + return self >= mark def desc(self, description): '''Describe a parser, when it failed, print out the description text.''' - return self | Parser(lambda _, index: Value.failure(index, description)) + return self | fail_with(description) def __or__(self, other): '''Implements the `(|)` operator, means `choice`.''' @@ -357,9 +395,29 @@ def choice(pa, pb): def try_choice(pa, pb): - '''Choice one from two parsers with backtrack, implements the operator of `(^)`.''' + '''Choose one from two parsers with backtrack, implements the operator of `(^)`.''' return pa.try_choice(pb) +def try_choices(*choices): + '''Choose one from the choices''' + return reduce(try_choice, choices) + +def try_choices_longest(*choices): + if not choices: + raise TypeError("choices cannot be empty") + + if not all(isinstance(choice, Parser) for choice in choices): + raise TypeError("choices can only be Parsers") + + @Parser + def longest(text, index): + results = list(map(lambda choice: choice(text, index), choices)) + if all(not result.status for result in results): + return Value.failure(index, 'does not match with any choices {}'.format(list(zip(choices, results)))) + + successful_results = list(filter(lambda result: result.status, results)) + return max(successful_results, key=lambda result: result.index) + return longest def skip(pa, pb): '''Ends with a specified parser, and at the end parser consumed the end flag. @@ -431,8 +489,8 @@ def generate(fn): @wraps(fn) @Parser def generated(text, index): - iterator, value = fn(), None try: + iterator, value = fn(), None while True: parser = iterator.send(value) res = parser(text, index) @@ -447,11 +505,14 @@ def generated(text, index): return Value.success(index, endval) except RuntimeError as error: stop = error.__cause__ - endval = stop.value - if isinstance(endval, Parser): - return endval(text, index) - else: - return Value.success(index, endval) + if isinstance(stop, StopIteration) and hasattr(stop, "value"): + endval = stop.value + if isinstance(endval, Parser): + return endval(text, index) + else: + return Value.success(index, endval) + # not what we want + raise error from None return generated.desc(fn.__name__) @@ -472,6 +533,7 @@ def times_parser(text, index): res = p(text, index) if res.status: if maxt == float('inf') and res.index == index: + # TODO: check whether it reaches mint # prevent infinite loop, see GH-43 break values.append(res.value) @@ -628,77 +690,42 @@ def sepEndBy1(p, sep): # Text.Parsec.Char ########################################################################## - -def any(): - '''Parses a arbitrary character.''' +def satisfy(predicate, failure=None): @Parser - def any_parser(text, index=0): - if index < len(text): + def satisfy_parser(text, index=0): + if index < len(text) and predicate(text[index]): return Value.success(index + 1, text[index]) else: - return Value.failure(index, 'a random char') - return any_parser + return Value.failure(index, failure or "does not satisfy predicate") + return satisfy_parser +def any(): + '''Parses a arbitrary character.''' + return satisfy(lambda _: True, 'a random char') def one_of(s): '''Parses a char from specified string.''' - @Parser - def one_of_parser(text, index=0): - if index < len(text) and text[index] in s: - return Value.success(index + 1, text[index]) - else: - return Value.failure(index, 'one of {}'.format(s)) - return one_of_parser - + return satisfy(lambda c: c in s, 'one of {}'.format(s)) def none_of(s): '''Parses a char NOT from specified string.''' - @Parser - def none_of_parser(text, index=0): - if index < len(text) and text[index] not in s: - return Value.success(index + 1, text[index]) - else: - return Value.failure(index, 'none of {}'.format(s)) - return none_of_parser - + return satisfy(lambda c: c not in s, 'none of {}'.format(s)) def space(): '''Parses a whitespace character.''' - @Parser - def space_parser(text, index=0): - if index < len(text) and text[index].isspace(): - return Value.success(index + 1, text[index]) - else: - return Value.failure(index, 'one space') - return space_parser - + return satisfy(str.isspace, 'one space') def spaces(): '''Parses zero or more whitespace characters.''' return many(space()) - def letter(): '''Parse a letter in alphabet.''' - @Parser - def letter_parser(text, index=0): - if index < len(text) and text[index].isalpha(): - return Value.success(index + 1, text[index]) - else: - return Value.failure(index, 'a letter') - return letter_parser - + return satisfy(str.isalpha, 'a letter') def digit(): '''Parse a digit.''' - @Parser - def digit_parser(text, index=0): - if index < len(text) and text[index].isdigit(): - return Value.success(index + 1, text[index]) - else: - return Value.failure(index, 'a digit') - return digit_parser - + return satisfy(str.isdigit, 'a digit') def eof(): '''Parses EOF flag of a string.''' @@ -710,7 +737,6 @@ def eof_parser(text, index=0): return Value.failure(index, 'EOF') return eof_parser - def string(s): '''Parses a string.''' @Parser @@ -744,17 +770,26 @@ def regex_parser(text, index): return Value.failure(index, exp.pattern) return regex_parser +def newline(): + return string("\n").desc("LF") + +def crlf(): + return (string("\r") >> newline()).desc("CRLF") + +def end_of_line(): + return (newline() | crlf()).desc("EOL") ########################################################################## # Useful utility parsers ########################################################################## +def success_with(value, advance=False): + return Parser(lambda _, index: Value.success(index + int(advance), value)) def fail_with(message): return Parser(lambda _, index: Value.failure(index, message)) - -def exclude(p: Parser, exclude: Parser): +def exclude(p, exclude): '''Fails parser p if parser `exclude` matches''' @Parser def exclude_parser(text, index): @@ -765,8 +800,7 @@ def exclude_parser(text, index): return p(text, index) return exclude_parser - -def lookahead(p: Parser): +def lookahead(p): '''Parses without consuming''' @Parser def lookahead_parser(text, index): @@ -778,7 +812,7 @@ def lookahead_parser(text, index): return lookahead_parser -def unit(p: Parser): +def unit(p): '''Converts a parser into a single unit. Only consumes input if the parser succeeds''' @Parser def unit_parser(text, index): @@ -789,7 +823,14 @@ def unit_parser(text, index): return Value.failure(index, res.expected) return unit_parser - +def between(open, close, parser): + @generate + def between_parser(): + yield open + results = yield parser + yield close + return results + return between_parser def fix(fn): '''Allow recursive parser using the Y combinator trick. @@ -800,3 +841,41 @@ def fix(fn): See also: https://github.com/sighingnow/parsec.py/issues/39. ''' return (lambda x: x(x))(lambda y: fn(lambda *args: y(y)(*args))) + +def validate(predicate): + def validator(value): + if predicate(value): + return success_with(value, advance=False) + else: + return fail_with(f"{value} does not satisfy the given predicate {predicate}") + return validator + +########################################################################## +# Text.Parsec.Number +########################################################################## + +sign = string("-").result(operator.neg).desc("'-'") | optional(string("+").desc("'+'")).result(lambda x: x) + +def number(base, digit): + return many1(digit).parsecmap( + lambda digits: reduce(lambda accumulation, digit: accumulation * base + int(digit, base), digits, 0), + ) + +binary_digit = one_of("01").desc("binary_digit") +binary_number = number(2, binary_digit).desc("binary_number") +binary = (one_of("bB") >> binary_number).desc("binary") + +octal_digit = one_of("01234567").desc("octal_digit") +octal_number = number(8, octal_digit).desc("octal_number") +octal = (one_of("oO") >> octal_number).desc("octal") + +hexadecimal_digit = one_of("0123456789ABCDEFabcdef").desc("hexadecimal_digit") +hexadecimal_number = number(16, hexadecimal_digit).desc("hexadecimal_number") +hexadecimal = (one_of("xX") >> hexadecimal_number).desc("hexadecimal") + +decimal_number = number(10, digit()).desc("decimal_number") +decimal = decimal_number + +zero_number = string("0") >> (hexadecimal | octal | binary | decimal | success_with(0)) +natural = zero_number | decimal +integer = sign.apply(natural) diff --git a/src/parsec/__init__.pyi b/src/parsec/__init__.pyi index 0c4a74f..827022b 100644 --- a/src/parsec/__init__.pyi +++ b/src/parsec/__init__.pyi @@ -44,7 +44,10 @@ class Parser(T.Generic[_U]): def parse(self, text: Text) -> _U: ... def parse_partial(self, text: Text) -> tuple[_U, Text]: ... def parse_strict(self, text: Text) -> _U: ... + @T.overload def bind(self, fn: CA.Callable[[_U], Parser[_V]]) -> Parser[_V]: ... + @T.overload + def bind(self, fn: CA.Callable[[_U, int], Parser[_V]]) -> Parser[_V]: ... def compose(self, other: Parser[_V]) -> Parser[_V]: ... def joint(self, *parsers: Parser[_U]) -> Parser[tuple[_U, ...]]: ... def choice(self, other: Parser[_V]) -> Parser[_U | _V]: ... @@ -52,10 +55,14 @@ class Parser(T.Generic[_U]): def skip(self, other: Parser[_V]) -> Parser[_U]: ... def ends_with(self, other: Parser[_V]) -> Parser[_U]: ... def excepts(self, ohter: Parser[_V]) -> Parser[_U]: ... - def parsecmap(self, fn: CA.Callable[[_U], _V]) -> Parser[_V]: ... + def parsecmap(self, fn: CA.Callable[[_U], _V], star: bool = False) -> Parser[_V]: ... + def map(self, fn: CA.Callable[[_U], _V], star: bool = False) -> Parser[_V]: ... def parsecapp( self: Parser[CA.Callable[[_V], _W]], other: Parser[_V] ) -> Parser[_W]: ... + def apply( + self: Parser[CA.Callable[[_V], _W]], other: Parser[_V] + ) -> Parser[_W]: ... def result(self, res: _V) -> Parser[_V]: ... def mark(self) -> Parser[tuple[_LocInfo, _U, _LocInfo]]: ... def desc(self, description: str) -> Parser[_U]: ... @@ -64,18 +71,23 @@ class Parser(T.Generic[_U]): def __add__(self, other: Parser[_V]) -> Parser[tuple[_U, _V]]: ... def __rshift__(self, other: Parser[_V]) -> Parser[_V]: ... def __gt__(self, other: Parser[_V]) -> Parser[_V]: ... - def __irshift__(self, other: CA.Callable[[_U], Parser[_V]]) -> Parser[_V]: ... + def __irshift__(self, other: CA.Callable[[_U], Parser[_V]]) -> Parser[_V]: ... # type: ignore[misc] def __ge__(self, other: Parser[_V]) -> Parser[_V]: ... def __lshift__(self, other: Parser[_V]) -> Parser[_U]: ... def __lt__(self, other: Parser[_V]) -> Parser[_U]: ... def __truediv__(self, other: Parser[_V]) -> Parser[_U]: ... def parse(p: Parser[_V], text: Text, index: int) -> _V: ... +@T.overload def bind(p: Parser[_U], fn: CA.Callable[[_U], Parser[_V]]) -> Parser[_V]: ... +@T.overload +def bind(p: Parser[_U], fn: CA.Callable[[_U, int], Parser[_V]]) -> Parser[_V]: ... def compose(pa: Parser, pb: Parser[_V]) -> Parser[_V]: ... def joint(*parsers: Parser[_U]) -> Parser[tuple[_U, ...]]: ... def choice(pa: Parser[_U], pb: Parser[_V]) -> Parser[_U | _V]: ... def try_choice(pa: Parser[_U], pb: Parser[_V]) -> Parser[_U | _V]: ... +def try_choices(*parsers: Parser[_U]) -> Parser[_U]: ... +def try_choices_longest(*parsers: Parser[_U]) -> Parser[_U]: ... def skip(pa: Parser[_U], pb: Parser) -> Parser[_U]: ... def ends_with(pa: Parser[_U], pb: Parser) -> Parser[_U]: ... def excepts(pa: Parser[_U], pb: Parser) -> Parser[_U]: ... @@ -116,6 +128,7 @@ def endBy(p: Parser[_U], sep: Parser) -> Parser[list[_U]]: ... def endBy1(p: Parser[_U], sep: Parser) -> Parser[list[_U]]: ... def sepEndBy(p: Parser[_U], sep: Parser) -> Parser[list[_U]]: ... def sepEndBy1(p: Parser[_U], sep: Parser) -> Parser[list[_U]]: ... +def satisfy(predicate: CA.Callable[[_U], bool]) -> Parser[_U]: ... def any() -> Parser: ... def one_of(s: CA.Container[_U]) -> Parser[_U]: ... def none_of(s: CA.Container[_U]) -> Parser[_U]: ... @@ -126,7 +139,33 @@ def digit() -> Parser[str]: ... def eof() -> Parser[None]: ... def string(s: _VS) -> Parser[_VS]: ... def regex(exp: str | re.Pattern, flags: re.RegexFlag = ...) -> Parser[str]: ... +def success_with(value: _U, advance: bool = False) -> Parser[_U]: ... def fail_with(message: str) -> Parser: ... def exclude(p: Parser[_U], exclude: Parser) -> Parser[_U]: ... def lookahead(p: Parser[_U]) -> Parser[_U]: ... def unit(p: Parser[_U]) -> Parser[_U]: ... +def between(open: Parser[_U], close: Parser[_U], parser: Parser[_U]) -> Parser[_U]: ... +def validate(predicate: CA.Callable[[_U], bool]) -> Parser[_U]: ... + +sign: Parser[CA.Callable[[_U], _U]] + +def number(base: int, digit: Parser[str]) -> Parser[int]: ... + +binary_digit: Parser[str] +binary_number: Parser[int] +binary: Parser[int] + +octal_digit: Parser[str] +octal_number: Parser[int] +octal: Parser[int] + +hexadecimal_digit: Parser[str] +hexadecimal_number: Parser[int] +hexadecimal: Parser[int] + +decimal_number: Parser[int] +decimal: Parser[int] + +zero_number: Parser[int] +natural: Parser[int] +integer: Parser[int] diff --git a/src/parsec/tests/test_parsec.py b/src/parsec/tests/test_parsec.py index b6b00e0..dee5515 100644 --- a/src/parsec/tests/test_parsec.py +++ b/src/parsec/tests/test_parsec.py @@ -9,13 +9,57 @@ __author__ = 'He Tao, sighingnow@gmail.com' +import re import random import unittest from parsec import * +class ParseErrorTest(unittest.TestCase): + def test_loc_info_should_throw_on_invalid_index(self): + with self.assertRaises(ValueError): + ParseError.loc_info("", 1) + + def test_loc_info_should_use_default_values_when_text_is_not_str(self): + self.assertEqual(ParseError.loc_info([0], 0), (0, -1)) + + def test_str(self): + self.assertTrue(str(ParseError("foo bar", "test", 0))) + # trigger ValueError + self.assertTrue(str(ParseError("foo bar", "", 1))) + +class ValueTest(unittest.TestCase): + def test_aggregate(self): + value = Value.failure(-1, "this") + self.assertEqual(value.aggregate(), value) + + value = Value.success(-1, ["foo"]) + self.assertEqual(value.aggregate(), value) + + other = Value.failure(-1, "that") + self.assertEqual(value.aggregate(other), other) + + other = Value.success(0, ["bar"]) + self.assertEqual(value.aggregate(other), Value.success(0, ["foo", "bar"])) + + def test_update_index(self): + value = Value.success(0, None) + self.assertEqual(value.update_index(), value) + self.assertEqual(value.update_index(1), Value.success(1, None)) + + def test_combinate(self): + with self.assertRaisesRegex(TypeError, "cannot call combinate without any value"): + Value.combinate([]) + + self.assertEqual(Value.combinate([Value.success(0, None)]), Value.success(0, (None,))) + self.assertEqual(Value.combinate([Value.failure(0, "expect to fail")]), Value.failure(0, "expect to fail")) + self.assertEqual(Value.combinate([Value.success(0, None), Value.failure(0, "expect to fail")]), Value.failure(0, "expect to fail")) + class ParsecTest(unittest.TestCase): '''Test the implementation of Text.Parsec. (The final test for all apis)''' + def test_repr(self): + self.assertIsNotNone(repr(any())) + def test_times_with_then(self): parser = times(letter(), 3) >> digit() self.assertEqual(parser.parse('xyz1'), '1') @@ -23,6 +67,11 @@ def test_times_with_then(self): self.assertRaises(ParseError, parser.parse, 'xyz') self.assertRaises(ParseError, parser.parse, 'xyzw') + def test_times_inf_maxt(self): + parser = times(eof(), 1, float('inf')) + self.assertEqual(parser.parse(''), []) + # self.assertEqual(parser.parse('abc'), ['a', 'b', 'c']) + def test_many_with_then(self): parser = many(string('x')) >> string('y') self.assertEqual(parser.parse('y'), 'y') @@ -57,11 +106,14 @@ def binder(x): nonlocals['piped'] = x return string('y') - parser = string('x').bind(binder) + parser = string('x') >= binder self.assertEqual(parser.parse('xy'), 'y') self.assertEqual(nonlocals['piped'], 'x') self.assertRaises(ParseError, parser.parse, 'x') + with self.assertRaises(TypeError): + parser >= (lambda x, y, z: any()) + def test_compose(self): parser = string('x') >> string('y') self.assertEqual(parser.parse('xy'), 'y') @@ -105,25 +157,59 @@ def test_try_choice(self): self.assertEqual(parser.parse('xy'), 'xy') self.assertEqual(parser.parse('xz'), 'xz') + def test_try_choices(self): + # cannot try_choices without choices + with self.assertRaisesRegex(TypeError, r"reduce\(\) of empty \w+ with no initial value"): + try_choices() + + parser = try_choices(string('x')) + self.assertEqual(parser.parse('x'), 'x') + + parser = try_choices(string('yz'), string('y')) + self.assertEqual(parser.parse('yz'), 'yz') + self.assertEqual(parser.parse('y'), 'y') + + parser = try_choices(string('x'), string('yz'), string('y')) + self.assertEqual(parser.parse('x'), 'x') + self.assertEqual(parser.parse('yz'), 'yz') + self.assertEqual(parser.parse('y'), 'y') + + def test_try_choices_longest(self): + with self.assertRaisesRegex(TypeError, "choices cannot be empty"): + try_choices_longest() + + with self.assertRaisesRegex(TypeError, "choices can only be Parsers"): + try_choices_longest(None) + + parser = try_choices_longest(string("x"), string("xyz")) + self.assertEqual(parser.parse("x"), "x") + self.assertEqual(parser.parse("xyz"), "xyz") + + with self.assertRaisesRegex(ParseError, r"does not match with any choices .*"): + parser.parse("y") + def test_ends_with(self): parser = string('x') < string('y') self.assertEqual(parser.parse('xy'), 'x') self.assertRaises(ParseError, parser.parse, 'xx') - def test_parsecmap(self): + with self.assertRaises(ParseError): + parser.parse('y') + + def test_map(self): def mapfn(p): return p + p - parser = string('x').parsecmap(mapfn) + parser = string('x').map(mapfn) self.assertEqual(parser.parse('x'), 'xx') - def test_parsecapp(self): + def test_apply(self): def genfn(p): return lambda c: 'fn:' + p + c + c - parser = string('x').parsecmap(genfn).parsecapp(string('y')) + parser = string('x').map(genfn).apply(string('y')) self.assertEqual(parser.parse('xy'), 'fn:xyy') def test_desc(self): @@ -132,7 +218,7 @@ def test_desc(self): self.assertRaises(ParseError, parser.parse, 'y') def test_mark(self): - parser = many(mark(many(letter())) << string("\n")) + parser = many1(mark(many(letter())) << string("\n")) lines = parser.parse("asdf\nqwer\n") @@ -148,6 +234,9 @@ def test_mark(self): self.assertEqual(letters, ['q', 'w', 'e', 'r']) self.assertEqual(end, (1, 4)) + with self.assertRaises(ParseError): + parser.parse("1") + def test_choice_with_compose(self): parser = (string('\\') >> string('y')) | string('z') self.assertEqual(parser.parse('\\y'), 'y') @@ -318,10 +407,22 @@ def test_excepts(self): self.assertEqual(parser.parse('<'), "<") self.assertEqual(parser.parse('<='), "<=") + with self.assertRaises(ParseError): + parser.parse('>') + parser = string('<') ^ string('<=') self.assertEqual(parser.parse('<'), "<") self.assertEqual(parser.parse('<='), "<") + def test_between(self): + parser = between(string("("), string(")"), many(none_of(")"))) + self.assertEqual(parser.parse("()"), []) + self.assertEqual(parser.parse("(abc)"), ["a", "b", "c"]) + self.assertRaises(ParseError, parser.parse, "") + self.assertRaises(ParseError, parser.parse, "(") + self.assertRaises(ParseError, parser.parse, ")") + self.assertRaises(ParseError, parser.parse, ")(") + def test_fix(self): @Parser @fix @@ -330,6 +431,10 @@ def bracketed_expr(recur): self.assertEqual(bracketed_expr.parse("((x))"), 'x') + def test_validate(self): + parser = any() >= validate(str.isalpha) + self.assertEqual(parser.parse("a"), "a") + self.assertRaises(ParseError, parser.parse, "1") class ParsecCharTest(unittest.TestCase): '''Test the implementation of Text.Parsec.Char.''' @@ -344,6 +449,95 @@ def test_regex(self): self.assertEqual(parser.parse('1'), '1') self.assertEqual(parser.parse('4'), '4') self.assertRaises(ParseError, parser.parse, 'x') + # combinator only accepts string as input + self.assertRaises(ParseError, parser.parse, [1]) + + parser = regex(re.compile(r'[0-9]')) + self.assertEqual(parser.parse('1'), '1') + + def test_one_of(self): + parser = one_of('abc') + self.assertEqual(parser.parse('a'), 'a') + self.assertEqual(parser.parse('b'), 'b') + self.assertEqual(parser.parse('c'), 'c') + self.assertRaises(ParseError, parser.parse, 'd') + + def test_none_of(self): + parser = none_of('abc') + self.assertRaises(ParseError, parser.parse, 'a') + self.assertRaises(ParseError, parser.parse, 'b') + self.assertRaises(ParseError, parser.parse, 'c') + self.assertEqual(parser.parse('d'), 'd') + + def test_exclude(self): + parser = exclude(string("test"), string("should-be-excluded")) + self.assertEqual(parser.parse("test"), "test") + self.assertRaises(ParseError, parser.parse, "should-be-excluded") + + def test_lookahead(self): + parser = lookahead(string("test")) + string("test") + self.assertEqual(parser.parse("test"), ("test", "test")) + self.assertRaises(ParseError, parser.parse, "tes") + + def test_unit(self): + parser = unit(string("abc")) | one_of("a") + self.assertEqual(parser.parse("abc"), "abc") + self.assertEqual(parser.parse("a"), "a") + +class ParsecNumberTest(unittest.TestCase): + '''Test the implementation of Text.Parsec.Number.''' + + def test_decimal(self): + parser = decimal + self.assertEqual(parser.parse('0'), 0) + self.assertEqual(parser.parse('1'), 1) + self.assertEqual(parser.parse('10'), 10) + self.assertEqual(parser.parse('9999'), 9999) + + def test_binary(self): + parser = binary + self.assertEqual(parser.parse('b0'), 0b0) + self.assertEqual(parser.parse('b1'), 0b1) + self.assertEqual(parser.parse('B1'), 0b1) + self.assertEqual(parser.parse('b10'), 0b10) + self.assertEqual(parser.parse('B10'), 0b10) + self.assertEqual(parser.parse('b1111'), 0b1111) + self.assertEqual(parser.parse('B1111'), 0b1111) + + def test_octal(self): + parser = octal + self.assertEqual(parser.parse('o0'), 0o0) + self.assertEqual(parser.parse('o1'), 0o1) + self.assertEqual(parser.parse('O1'), 0o1) + self.assertEqual(parser.parse('o10'), 0o10) + self.assertEqual(parser.parse('O10'), 0o10) + self.assertEqual(parser.parse('o7777'), 0o7777) + self.assertEqual(parser.parse('O7777'), 0o7777) + + def test_hexadecimal(self): + parser = hexadecimal + self.assertEqual(parser.parse('x0'), 0x0) + self.assertEqual(parser.parse('x1'), 0x1) + self.assertEqual(parser.parse('X1'), 0x1) + self.assertEqual(parser.parse('x10'), 0x10) + self.assertEqual(parser.parse('X10'), 0x10) + self.assertEqual(parser.parse('xffff'), 0xffff) + self.assertEqual(parser.parse('Xffff'), 0xffff) + + def test_integer(self): + parser = integer + self.assertEqual(parser.parse('0'), 0) + self.assertEqual(parser.parse('-1'), -1) + self.assertEqual(parser.parse('+1'), 1) + self.assertEqual(parser.parse('0b10'), 0b10) + self.assertEqual(parser.parse('-0b10'), -0b10) + self.assertEqual(parser.parse('+0b10'), 0b10) + self.assertEqual(parser.parse('0o10'), 0o10) + self.assertEqual(parser.parse('+0o10'), 0o10) + self.assertEqual(parser.parse('-0o10'), -0o10) + self.assertEqual(parser.parse('0x10'), 0x10) + self.assertEqual(parser.parse('+0x10'), 0x10) + self.assertEqual(parser.parse('-0x10'), -0x10) class ParserGeneratorTest(unittest.TestCase): '''Test the implementation of Parser Generator.(generate)''' @@ -387,6 +581,8 @@ def test_generate_raise(self): def xy(): yield string('x') yield string('y') + + # NOTE: this will appear in the form of a RuntimeError caused by StopIteration r = StopIteration('success') r.value = 'success' # for pre-3.3 Python raise r @@ -394,5 +590,40 @@ def xy(): parser = xy self.assertEqual(parser.parse('xy'), 'success') + @generate + def yz(): + r = StopIteration() + r.value = string("yz") + raise r + + parser = yz + self.assertEqual(parser.parse('yz'), 'yz') + + @generate + def stop_iteration_without_value(): + # simulate python 2 + r = StopIteration() + delattr(r, "value") + raise RuntimeError from r + + parser = stop_iteration_without_value + self.assertEqual(parser.parse("whatever"), None) + + @generate + def stop_iteration_with_parser_as_value(): + raise RuntimeError from StopIteration(string("yz")) + + parser = stop_iteration_with_parser_as_value + self.assertEqual(parser.parse("yz"), "yz") + + @generate + def runtime_error(): + r = RuntimeError + raise r + + parser = runtime_error + with self.assertRaises(RuntimeError): + parser.parse("whatever") + if __name__ == '__main__': unittest.main()