From f6cd94f86ac1e19de8f711c0ca0942ab69f2161e Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Thu, 2 May 2024 13:42:21 -0400 Subject: [PATCH] Allow regex matching in excludes --- docs/definition.rst | 47 ++++++++++----- .../_target_scripts/introspect.py | 42 ++++++++++---- test/unit/test_introspect.py | 58 ++++++++++++++++++- 3 files changed, 122 insertions(+), 25 deletions(-) diff --git a/docs/definition.rst b/docs/definition.rst index 7087e82a..caa7c6ea 100644 --- a/docs/definition.rst +++ b/docs/definition.rst @@ -261,20 +261,39 @@ The following keys are valid for this section: be a filename, or a list of requirements (see below for an example). ``exclude`` - A list of Python or system requirements to be excluded from the top-level dependency requirements - of referenced collections. These exclusions will not apply to the user supplied Python or system - dependencies, nor will they apply to dependencies of dependencies (top-level only). Python dependency - exclusions should be a list of package names appearing under the ``python`` key name. System dependency - exclusions should be a list of system package names appearing under the ``system`` key name. If you - want to exclude *all* Python and system dependencies from one or more collections, supply the list - of collection names under the ``all_from_collections`` key. + A dictionary defining the Python or system requirements to be excluded from the top-level dependency + requirements of referenced collections. These exclusions will not apply to the user supplied Python or + system dependencies, nor will they apply to dependencies of dependencies (top-level only). - The exclusion string should be the simple name of the requirement you want excluded. For example, - if you need to exclude the system requirement that appears as ``foo [!platform:gentoo]`` within - an included collection, then your exclusion string should be ``foo``. To exclude the Python - requirement ``bar == 1.0.0``, your exclusion string would be ``bar``. + The following keys are valid for this section: - Example: + * ``python`` - A list of Python dependencies to be excluded. + * ``system`` - A list of system dependencies to be excluded. + * ``all_from_collections`` - If you want to exclude *all* Python and system dependencies from one or + more collections, supply a list of collection names under this key. + + The exclusion feature supports two forms of matching: + + * Simple name matching. + * Advanced name matching using regular expressions. + + For simple name matching, you need only supply the name of the requirement/collection to match. + All values will be compared in a case-insensitive manner. + + For advanced name matching, begin the exclusion string with the tilde (``~``) character to + indicate that the remaining portion of the string is a regular expression to be used to match + a requirement/collection name. The regex should be considered case-insensitive. + + .. note:: + The regular expression must match the full requirement/collection name. For example, ``~foo.`` + does not fully match the name ``foobar``, but ``~foo.+`` does. + + With both forms of matching, the exclusion string will be compared against the *simple* name of + any Python or system requirement. For example, if you need to exclude the system requirement that + appears as ``foo [!platform:gentoo]`` within an included collection, then your exclusion string should be + ``foo``. To exclude the Python requirement ``bar == 1.0.0``, your exclusion string would be ``bar``. + + Example using both simple and advanced matching: .. code:: yaml @@ -285,8 +304,8 @@ The following keys are valid for this section: system: - python3-Cython all_from_collections: - - community.crypto - - community.docker + # Regular expression to exclude all from community collections + - ~community\..+ .. note:: The ``exclude`` option requires ``ansible-builder`` version ``3.1`` or newer. diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index 66a87617..43c97825 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -276,6 +276,26 @@ def strip_comments(reqs: dict[str, list]) -> dict[str, list]: return result +def should_be_excluded(value: str, exclusion_list: list[str]) -> bool: + """ + Test if `value` matches against any value in `exclusion_list`. + + The exclusion_list values are either strings to be compared in a case-insensitive + manner against value, OR, they are regular expressions to be tested against the + value. A regular expression will contain '~' as the first character. + + :return: True if the value should be excluded, False otherwise. + """ + for exclude_value in exclusion_list: + if exclude_value[0] == "~": + pattern = exclude_value[1:] + if re.fullmatch(pattern.lower(), value.lower()): + return True + elif exclude_value.lower() == value.lower(): + return True + return False + + def filter_requirements(reqs: dict[str, list], exclude: list[str] | None = None, exclude_collections: list[str] | None = None, @@ -302,16 +322,16 @@ def filter_requirements(reqs: dict[str, list], collection_ignore_list: list[str] = [] if exclude: - exclusions = [r.lower() for r in exclude] + exclusions = exclude.copy() if exclude_collections: - collection_ignore_list = [c.lower() for c in exclude_collections] + collection_ignore_list = exclude_collections.copy() annotated_lines: list[str] = [] uncommented_reqs = strip_comments(reqs) for collection, lines in uncommented_reqs.items(): # Bypass this collection if we've been told to ignore all requirements from it. - if collection.lower() in collection_ignore_list: + if should_be_excluded(collection, collection_ignore_list): logger.debug("# Excluding all requirements from collection '%s'", collection) continue @@ -332,14 +352,16 @@ def filter_requirements(reqs: dict[str, list], # bindep system requirements have the package name as the first "word" on the line name = line.split(maxsplit=1)[0] - lower_name = name.lower() + if collection.lower() not in {'user', 'exclude'}: + lower_name = name.lower() - if lower_name in exclusions and collection not in {'user', 'exclude'}: - logger.debug("# Explicitly excluding requirement '%s' from '%s'", name, collection) - continue - if lower_name in EXCLUDE_REQUIREMENTS and collection not in {'user', 'exclude'}: - logger.debug("# Excluding requirement '%s' from '%s'", name, collection) - continue + if lower_name in EXCLUDE_REQUIREMENTS: + logger.debug("# Excluding requirement '%s' from '%s'", name, collection) + continue + + if should_be_excluded(lower_name, exclusions): + logger.debug("# Explicitly excluding requirement '%s' from '%s'", name, collection) + continue annotated_lines.append(f'{line} # from collection {collection}') diff --git a/test/unit/test_introspect.py b/test/unit/test_introspect.py index 74ab4b6b..3e57cc63 100644 --- a/test/unit/test_introspect.py +++ b/test/unit/test_introspect.py @@ -123,7 +123,7 @@ def test_yaml_extension(data_dir): } -def test_sanitize_pep508(): +def test_filter_requirements_pep508(): reqs = { 'a.b': [ 'foo[ext1,ext3] == 1', @@ -370,3 +370,59 @@ def test_filter_requirements_excludes_collections(): ] assert filter_requirements(reqs, exclude_collections=excluded_collections) == expected + + +def test_requirement_regex_exclusions(): + reqs = { + "a.b": [ + "foo", + "shimmy", + "kungfoo", + "aaab", + ], + "c.d": [ + "foobar", + "shake", + "ab", + ] + } + + excluded = [ + "Foo", # straight string comparison (case shouldn't matter) + "foo.", # straight string comparison (shouldn't match) + "~foo.", # regex (shouldn't match b/c not full string match) + "~Sh.*", # regex (case shouldn't matter) + "~^.+ab", # regex + ] + + expected = [ + "kungfoo # from collection a.b", + "foobar # from collection c.d", + "ab # from collection c.d" + ] + + assert filter_requirements(reqs, excluded) == expected + + +def test_collection_regex_exclusions(): + reqs = { + "a.b": ["foo"], + "c.d": ["bar"], + "ab.cd": ["foobar"], + "e.f": ["baz"], + "be.fun": ["foobaz"], + } + + excluded_collections = [ + r"~A\..+", # regex (case shouldn't matter) + "E.F", # straight string comparison (case shouldn't matter) + "~b.c", # regex (shouldn't match b/c not full string match) + ] + + expected = [ + "bar # from collection c.d", + "foobar # from collection ab.cd", + "foobaz # from collection be.fun", + ] + + assert filter_requirements(reqs, exclude_collections=excluded_collections) == expected