Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix duration parsing in lists #255

Merged
merged 2 commits into from
Jul 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 32 additions & 13 deletions pyhocon/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
ParserElement, ParseSyntaxException, QuotedString,
Regex, SkipTo, StringEnd, Suppress, TokenConverter,
Word, ZeroOrMore, alphanums, alphas8bit, col, lineno,
replaceWith)
replaceWith, Or, nums, White, WordEnd)

# Fix deepcopy issue with pyparsing
if sys.version_info >= (3, 8):
Expand Down Expand Up @@ -295,7 +295,6 @@ def convert_number(tokens):
return float(n)

def convert_period(tokens):

period_value = int(tokens.value)
period_identifier = tokens.unit

Expand Down Expand Up @@ -431,14 +430,17 @@ def set_default_white_spaces():
comment_no_comma_eol = (comment | eol).suppress()
number_expr = Regex(r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))',
re.DOTALL).setParseAction(convert_number)
# Must be sorted from longest to shortest otherwise 'weeks' will match 'w' and 'eeks'
# will be parsed as a general string.
period_types = sorted(
itertools.chain.from_iterable(cls.get_supported_period_type_map().values()),
key=lambda x: len(x), reverse=True)
period_expr = Regex(
r'(?P<value>\d+)\s*(?P<unit>' + '|'.join(period_types) + ')$',
flags=re.MULTILINE,

# Flatten the list of lists with unit strings.
period_types = list(itertools.chain(*cls.get_supported_period_type_map().values()))
# `Or()` tries to match the longest expression if more expressions
# are matching. We employ this to match e.g.: 'weeks' so that we
# don't end up with 'w' and 'eeks'. Note that 'weeks' but also 'w'
# are valid unit identifiers.
# Allow only spaces as a valid separator between value and unit.
# E.g. \t as a separator is invalid: '10<TAB>weeks'.
period_expr = (
Word(nums)('value') + ZeroOrMore(White(ws=' ')).suppress() + Or(period_types)('unit') + WordEnd(alphanums).suppress()
).setParseAction(convert_period)

# multi line string using """
Expand Down Expand Up @@ -748,9 +750,26 @@ def postParse(self, instring, loc, token_list):
:param token_list:
:return:
"""
cleaned_token_list = [token for tokens in (token.tokens if isinstance(token, ConfigInclude) else [token]
for token in token_list if token != '')
for token in tokens]
cleaned_token_list = []
# Note that a token can be a duration value object:
# >>> relativedelta(hours = 1) == ''
# False
# >>> relativedelta(hours = 1) != ''
# False
# relativedelta.__eq__() raises NotImplemented if it is compared with
# a different object type so Python falls back to identity comparison.
# We cannot compare this object to a string object.
for token in token_list:
if isinstance(token, str) and token == '':
# This is the case when there was a trailing comma in the list.
# The last token is just an empty string so we can safely ignore
# it.
continue
if isinstance(token, ConfigInclude):
cleaned_token_list.extend(token.tokens)
else:
cleaned_token_list.append(token)

config_list = ConfigList(cleaned_token_list)
return [config_list]

Expand Down
10 changes: 10 additions & 0 deletions tests/test_config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,16 @@ def test_parse_string_with_duration_with_long_unit_name(self):
)
assert config['b'] == period(weeks=10)

def test_parse_with_list_mixed_types_with_durations_and_trailing_comma(self):
config = ConfigFactory.parse_string(
"""
a: foo
b: [a, 1, 10 weeks, 5 minutes,]
c: bar
"""
)
assert config['b'] == ['a', 1, period(weeks=10), period(minutes=5)]

def test_parse_with_enclosing_square_bracket(self):
config = ConfigFactory.parse_string("[1, 2, 3]")
assert config == [1, 2, 3]
Expand Down