diff --git a/pyhocon/config_parser.py b/pyhocon/config_parser.py index 9241de87..b1899961 100644 --- a/pyhocon/config_parser.py +++ b/pyhocon/config_parser.py @@ -16,7 +16,7 @@ ParserElement, ParseSyntaxException, QuotedString, Regex, SkipTo, StringEnd, Suppress, TokenConverter, Word, ZeroOrMore, alphanums, alphas8bit, col, lineno, - replaceWith) + replaceWith, Or, nums, White, WordEnd) # Fix deepcopy issue with pyparsing if sys.version_info >= (3, 8): @@ -295,7 +295,6 @@ def convert_number(tokens): return float(n) def convert_period(tokens): - period_value = int(tokens.value) period_identifier = tokens.unit @@ -431,14 +430,17 @@ def set_default_white_spaces(): comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex(r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) - # Must be sorted from longest to shortest otherwise 'weeks' will match 'w' and 'eeks' - # will be parsed as a general string. - period_types = sorted( - itertools.chain.from_iterable(cls.get_supported_period_type_map().values()), - key=lambda x: len(x), reverse=True) - period_expr = Regex( - r'(?P\d+)\s*(?P' + '|'.join(period_types) + ')$', - flags=re.MULTILINE, + + # Flatten the list of lists with unit strings. + period_types = list(itertools.chain(*cls.get_supported_period_type_map().values())) + # `Or()` tries to match the longest expression if more expressions + # are matching. We employ this to match e.g.: 'weeks' so that we + # don't end up with 'w' and 'eeks'. Note that 'weeks' but also 'w' + # are valid unit identifiers. + # Allow only spaces as a valid separator between value and unit. + # E.g. \t as a separator is invalid: '10weeks'. + period_expr = ( + Word(nums)('value') + ZeroOrMore(White(ws=' ')).suppress() + Or(period_types)('unit') + WordEnd(alphanums).suppress() ).setParseAction(convert_period) # multi line string using """ @@ -748,9 +750,26 @@ def postParse(self, instring, loc, token_list): :param token_list: :return: """ - cleaned_token_list = [token for tokens in (token.tokens if isinstance(token, ConfigInclude) else [token] - for token in token_list if token != '') - for token in tokens] + cleaned_token_list = [] + # Note that a token can be a duration value object: + # >>> relativedelta(hours = 1) == '' + # False + # >>> relativedelta(hours = 1) != '' + # False + # relativedelta.__eq__() raises NotImplemented if it is compared with + # a different object type so Python falls back to identity comparison. + # We cannot compare this object to a string object. + for token in token_list: + if isinstance(token, str) and token == '': + # This is the case when there was a trailing comma in the list. + # The last token is just an empty string so we can safely ignore + # it. + continue + if isinstance(token, ConfigInclude): + cleaned_token_list.extend(token.tokens) + else: + cleaned_token_list.append(token) + config_list = ConfigList(cleaned_token_list) return [config_list] diff --git a/tests/test_config_parser.py b/tests/test_config_parser.py index a33e8029..10e7932e 100644 --- a/tests/test_config_parser.py +++ b/tests/test_config_parser.py @@ -148,6 +148,16 @@ def test_parse_string_with_duration_with_long_unit_name(self): ) assert config['b'] == period(weeks=10) + def test_parse_with_list_mixed_types_with_durations_and_trailing_comma(self): + config = ConfigFactory.parse_string( + """ + a: foo + b: [a, 1, 10 weeks, 5 minutes,] + c: bar + """ + ) + assert config['b'] == ['a', 1, period(weeks=10), period(minutes=5)] + def test_parse_with_enclosing_square_bracket(self): config = ConfigFactory.parse_string("[1, 2, 3]") assert config == [1, 2, 3]