Skip to content

Commit

Permalink
Allow single value parsing in CSVParameter and MultiCSVParameter.
Browse files Browse the repository at this point in the history
  • Loading branch information
riga committed Dec 4, 2022
1 parent 54cd3e4 commit cc2aa0a
Showing 1 changed file with 41 additions and 10 deletions.
51 changes: 41 additions & 10 deletions law/parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def serialize(self, value):

class CSVParameter(luigi.Parameter):
r""" __init__(*args, cls=luigi.Parameter, unique=False, sort=False, min_len=None, \
max_len=None, choices=None, brace_expand=False, escape_sep=True, **kwargs)
max_len=None, choices=None, brace_expand=False, escape_sep=True, force_tuple=True, **kwargs)
Parameter that parses a comma-separated value (CSV) and produces a tuple. *cls* can refer to an
other parameter class that will be used to parse and serialize the particular items.
Expand All @@ -219,6 +219,11 @@ class CSVParameter(luigi.Parameter):
Unless *escape_sep* is *False*, escaped separators (comma) are not split when parsing
strings and, likewise, separators contained in values to serialze are escaped.
By default, single values are parsed such that they result in a tuple containing a single item.
However, when *force_tuple* is *False*, single values that do not end with a comma are not
wrapped by a tuple. Likewise, during serialization they are converted to a string as is, whereas
tuple containing only a single item will end with a trailing comma.
Example:
.. code-block:: python
Expand Down Expand Up @@ -258,6 +263,20 @@ class CSVParameter(luigi.Parameter):
p.parse("1{2,3,4}9")
# => (129, 139, 149)
# do not force tuples to wrap single values
p = CSVParameter(cls=luigi.IntParameter, force_tuple=False)
p.parse("1")
# => 1
# note: the result would be (1,) with force_tuple left at its default value (True)
p.parse("1,")
# => (1,)
p.serialize(1)
# => "1"
p.serialize((1,))
# => "1,"
p.serialize((1, 2))
# => "1,2"
.. note::
Due to the way `instance caching
Expand All @@ -283,6 +302,7 @@ def __init__(self, *args, **kwargs):
self._choices = kwargs.pop("choices", None)
self._brace_expand = kwargs.pop("brace_expand", False)
self._escape_sep = kwargs.pop("escape_sep", True)
self._force_tuple = kwargs.pop("force_tuple", True)

# ensure that the default value is a tuple
if "default" in kwargs:
Expand Down Expand Up @@ -335,11 +355,15 @@ def _check_choices(self, value):

def parse(self, inp):
""""""
return_single_value = False
if not inp or inp == NO_STR:
value = tuple()
elif isinstance(inp, (tuple, list)) or is_lazy_iterable(inp):
value = make_tuple(inp)
elif isinstance(inp, six.string_types):
if not self._force_tuple:
ended_with_comma = inp.endswith(",")
inp = inp.rstrip(",")
if self._brace_expand:
elems = brace_expand(inp, split_csv=True, escape_csv_sep=self._escape_sep)
else:
Expand All @@ -353,6 +377,7 @@ def parse(self, inp):
if self._escape_sep:
elems = [elem.replace(escaped_sep, ",") for elem in elems]
value = tuple(map(self._inst.parse, elems))
return_single_value = len(value) == 1 and not self._force_tuple and not ended_with_comma
else:
value = (inp,)

Expand All @@ -362,13 +387,15 @@ def parse(self, inp):
self._check_len(value)
self._check_choices(value)

return value
return value[0] if return_single_value else value

def serialize(self, value):
""""""
if not value:
value = tuple()

# ensure value is a tuple
was_sequence = isinstance(value, (tuple, list))
value = make_tuple(value)

# apply uniqueness, sort, length and choices checks
Expand All @@ -377,20 +404,27 @@ def serialize(self, value):
self._check_len(value)
self._check_choices(value)

return ",".join(str(self._inst.serialize(elem)) for elem in value)
# convert to string
s = ",".join(str(self._inst.serialize(elem)) for elem in value)

# add a trailing comma if necessary
if len(value) == 1 and not self._force_tuple and was_sequence:
s += ","

return s


class MultiCSVParameter(CSVParameter):
r""" __init__(*args, cls=luigi.Parameter, unique=False, sort=False, min_len=None, \
max_len=None, choices=None, brace_expand=False, escape_sep=True, **kwargs)
max_len=None, choices=None, brace_expand=False, escape_sep=True, force_tuple=True, **kwargs)
Parameter that parses several comma-separated values (CSV), separated by colons, and produces a
nested tuple. *cls* can refer to an other parameter class that will be used to parse and
serialize the particular items.
Except for the additional support for multiple CSV sequences, the parsing and serialization
implementation is based on :py:class:`CSVParameter`, which also handles the features controlled
by *unique*, *sort*, *max_len*, *min_len*, *choices*, *brace_expand* and *escape_sep* per
sequence of values.
by *unique*, *sort*, *max_len*, *min_len*, *choices*, *brace_expand*, *escape_sep* and
*force_tuple* per sequence of values.
However, note that in this case colon characters that are not meant to act as a delimiter cannot
be quoted in csv-style with double quotes, but they should rather be backslash-escaped instead.
Expand Down Expand Up @@ -484,10 +518,7 @@ def serialize(self, value):
if not value:
return ""

# ensure that value is a nested tuple
value = tuple(map(make_tuple, make_tuple(value)))

return ":".join(super(MultiCSVParameter, self).serialize(v) for v in value)
return ":".join(super(MultiCSVParameter, self).serialize(v) for v in make_tuple(value))


class RangeParameter(luigi.Parameter):
Expand Down

0 comments on commit cc2aa0a

Please sign in to comment.