From 555008a829cc69a262a4b9e30aa897f498780a2b Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Mon, 29 Jan 2024 18:00:19 -0600 Subject: [PATCH 01/28] Drop requirements-parser in favor of pep508 --- setup.cfg | 2 +- .../_target_scripts/introspect.py | 62 ++++++++----------- src/ansible_builder/containerfile.py | 2 +- .../test/reqfile/extra_req.txt | 2 - .../test/reqfile/requirements.txt | 3 +- test/integration/test_introspect_cli.py | 4 +- test/unit/test_introspect.py | 4 +- 7 files changed, 34 insertions(+), 45 deletions(-) delete mode 100644 test/data/ansible_collections/test/reqfile/extra_req.txt diff --git a/setup.cfg b/setup.cfg index e513eeb7..8ca6c698 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,7 +36,7 @@ max-line-length=160 include_package_data = true install_requires = PyYAML - requirements_parser + packaging bindep jsonschema setuptools; python_version >= "3.12" diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index bbd9c95e..b18adce5 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -1,16 +1,19 @@ import argparse -import importlib.metadata import logging import os +import re import sys import yaml -import requirements - +from packaging.requirements import InvalidRequirement, Requirement +from packaging.specifiers import SpecifierSet +from packaging.utils import canonicalize_name base_collections_path = '/usr/share/ansible/collections' logger = logging.getLogger(__name__) +COMMENT_RE = re.compile(r'\s*#.*$') + def line_is_empty(line): return bool((not line.strip()) or line.startswith('#')) @@ -330,47 +333,34 @@ def sanitize_requirements(collection_py_reqs): :returns: A finalized list of sanitized Python requirements. """ # de-duplication - consolidated = [] - seen_pkgs = set() + consolidated = {} for collection, lines in collection_py_reqs.items(): - try: - for req in requirements.parse('\n'.join(lines)): - if req.specifier: - req.name = importlib.metadata.Prepared(req.name).normalized - req.collections = [collection] # add backref for later - if req.name is None: - consolidated.append(req) - continue - if req.name in seen_pkgs: - for prior_req in consolidated: - if req.name == prior_req.name: - prior_req.specs.extend(req.specs) - prior_req.collections.append(collection) - break - continue - consolidated.append(req) - seen_pkgs.add(req.name) - except Exception as e: - logger.warning('Warning: failed to parse requirements from %s, error: %s', collection, e) + for line in lines: + if not (line := COMMENT_RE.sub('', line.strip())): + continue + try: + req = Requirement(line) + except InvalidRequirement as e: + logger.warning('Warning: failed to parse requirements from %s, error: %s', collection, e) + continue + req.name = canonicalize_name(req.name) + req.collections = [collection] # add backref for later + if (prior_req := consolidated.get(req.name)) and prior_req.marker == req.marker: + specifiers = f'{prior_req.specifier},{req.specifier}' + prior_req.specifier = SpecifierSet(specifiers) + prior_req.collections.append(collection) + continue + consolidated[req.name] = req # removal of unwanted packages sanitized = [] - for req in consolidated: + for name, req in consolidated.items(): # Exclude packages, unless it was present in the user supplied requirements. - if req.name and req.name.lower() in EXCLUDE_REQUIREMENTS and 'user' not in req.collections: + if name.lower() in EXCLUDE_REQUIREMENTS and 'user' not in req.collections: logger.debug('# Excluding requirement %s from %s', req.name, req.collections) continue - if req.vcs or req.uri: - # Requirement like git+ or http return as-is - new_line = req.line - elif req.name: - specs = [f'{cmp}{ver}' for cmp, ver in req.specs] - new_line = req.name + ','.join(specs) - else: - raise RuntimeError(f'Could not process {req.line}') - - sanitized.append(f'{new_line} # from collection {",".join(req.collections)}') + sanitized.append(f'{req} # from collection {",".join(req.collections)}') return sanitized diff --git a/src/ansible_builder/containerfile.py b/src/ansible_builder/containerfile.py index d9092bfd..df02225f 100644 --- a/src/ansible_builder/containerfile.py +++ b/src/ansible_builder/containerfile.py @@ -144,7 +144,7 @@ def prepare(self) -> None: self._insert_global_args() if image == "base": - self.steps.append("RUN $PYCMD -m pip install --no-cache-dir bindep pyyaml requirements-parser") + self.steps.append("RUN $PYCMD -m pip install --no-cache-dir bindep pyyaml packaging") else: # For an EE schema earlier than v3 with a custom builder image, we always make sure pip is available. context_dir = Path(self.build_outputs_dir).stem diff --git a/test/data/ansible_collections/test/reqfile/extra_req.txt b/test/data/ansible_collections/test/reqfile/extra_req.txt deleted file mode 100644 index 81a47c4b..00000000 --- a/test/data/ansible_collections/test/reqfile/extra_req.txt +++ /dev/null @@ -1,2 +0,0 @@ -tacacs_plus -pyvcloud>=18.0.10 # duplicate with test.metadata collection diff --git a/test/data/ansible_collections/test/reqfile/requirements.txt b/test/data/ansible_collections/test/reqfile/requirements.txt index 6141c4ce..96b9a3fa 100644 --- a/test/data/ansible_collections/test/reqfile/requirements.txt +++ b/test/data/ansible_collections/test/reqfile/requirements.txt @@ -1,4 +1,5 @@ pytz python-dateutil>=2.8.2 # intentional dash jinja2>=3.0 # intentional lowercase --r extra_req.txt +tacacs_plus +pyvcloud>=18.0.10 # duplicate with test.metadata collection diff --git a/test/integration/test_introspect_cli.py b/test/integration/test_introspect_cli.py index 72449600..6d472767 100644 --- a/test/integration/test_introspect_cli.py +++ b/test/integration/test_introspect_cli.py @@ -50,9 +50,9 @@ def test_introspect_write_python_and_sanitize(cli, data_dir, tmp_path): assert dest_file.read_text() == '\n'.join([ 'pyvcloud>=14,>=18.0.10 # from collection test.metadata,test.reqfile', 'pytz # from collection test.reqfile', - 'python_dateutil>=2.8.2 # from collection test.reqfile', + 'python-dateutil>=2.8.2 # from collection test.reqfile', 'jinja2>=3.0 # from collection test.reqfile', - 'tacacs_plus # from collection test.reqfile', + 'tacacs-plus # from collection test.reqfile', '', ]) diff --git a/test/unit/test_introspect.py b/test/unit/test_introspect.py index 36ea823e..870be283 100644 --- a/test/unit/test_introspect.py +++ b/test/unit/test_introspect.py @@ -17,11 +17,11 @@ def test_multiple_collection_metadata(data_dir): 'pytz # from collection test.reqfile', # python-dateutil should appear only once even though referenced in # multiple places, once with a dash and another with an underscore in the name. - 'python_dateutil>=2.8.2 # from collection test.reqfile', + 'python-dateutil>=2.8.2 # from collection test.reqfile', # jinja2 should appear only once even though referenced in multiple # places, once with uppercase and another with lowercase in the name. 'jinja2>=3.0 # from collection test.reqfile', - 'tacacs_plus # from collection test.reqfile' + 'tacacs-plus # from collection test.reqfile' ], 'system': [ 'subversion [platform:rpm] # from collection test.bindep', 'subversion [platform:dpkg] # from collection test.bindep' From fa9f5e5aa8439159274d760641011fed59b129c7 Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Tue, 30 Jan 2024 12:11:01 -0600 Subject: [PATCH 02/28] key using name and marker --- src/ansible_builder/_target_scripts/introspect.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index b18adce5..a8096c76 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -346,16 +346,17 @@ def sanitize_requirements(collection_py_reqs): continue req.name = canonicalize_name(req.name) req.collections = [collection] # add backref for later - if (prior_req := consolidated.get(req.name)) and prior_req.marker == req.marker: + key = (req.name, req.marker) + if (prior_req := consolidated.get(key)): specifiers = f'{prior_req.specifier},{req.specifier}' prior_req.specifier = SpecifierSet(specifiers) prior_req.collections.append(collection) continue - consolidated[req.name] = req + consolidated[key] = req # removal of unwanted packages sanitized = [] - for name, req in consolidated.items(): + for (name, _marker), req in consolidated.items(): # Exclude packages, unless it was present in the user supplied requirements. if name.lower() in EXCLUDE_REQUIREMENTS and 'user' not in req.collections: logger.debug('# Excluding requirement %s from %s', req.name, req.collections) From f5d8fabadfe18a9cc0131b2438f7dd9a8111a9ca Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Tue, 30 Jan 2024 13:40:30 -0600 Subject: [PATCH 03/28] Handle urls --- src/ansible_builder/_target_scripts/introspect.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index a8096c76..cf6c3089 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -350,6 +350,10 @@ def sanitize_requirements(collection_py_reqs): if (prior_req := consolidated.get(key)): specifiers = f'{prior_req.specifier},{req.specifier}' prior_req.specifier = SpecifierSet(specifiers) + if not prior_req.url and req.url: + # An explicit install URL is preferred over none + # The first URL seen wins + prior_req.url = req.url prior_req.collections.append(collection) continue consolidated[key] = req From 10f9abe672572b9b4017b59ebf01fdf1af7bf30e Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Tue, 30 Jan 2024 13:45:56 -0600 Subject: [PATCH 04/28] update extras --- src/ansible_builder/_target_scripts/introspect.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index cf6c3089..5dc79153 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -354,6 +354,7 @@ def sanitize_requirements(collection_py_reqs): # An explicit install URL is preferred over none # The first URL seen wins prior_req.url = req.url + prior_req.extras.update(req.extras) prior_req.collections.append(collection) continue consolidated[key] = req From 3559cac75adb4f2f34c09bf3ae86430531ed0575 Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Tue, 30 Jan 2024 15:33:30 -0600 Subject: [PATCH 05/28] Add some tests for pep508 stuff --- test/unit/test_introspect.py | 44 ++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/test/unit/test_introspect.py b/test/unit/test_introspect.py index 870be283..02cfb014 100644 --- a/test/unit/test_introspect.py +++ b/test/unit/test_introspect.py @@ -83,3 +83,47 @@ def test_yaml_extension(data_dir): 'python': {'test_collection.test_yaml_extension': ['python-six']}, 'system': {}, } + + +def test_sanitize_pep508(): + reqs = { + 'a.b': [ + 'foo[ext1,ext3] == 1', + 'bar; python_version < "2.7"', + 'A', + "name", + ], + 'c.d': [ + 'FOO >= 1', + 'bar; python_version < "3.6"', + "name<=1", + ], + 'e.f': [ + 'foo[ext2] @ git+http://github.com/foo/foo.git', + "name>=3", + ], + 'g.h': [ + "name>=3,<2", + ], + 'i.j': [ + "name@http://foo.com", + ], + 'k.l': [ + "name [fred,bar] @ http://foo.com ; python_version=='2.7'", + ], + 'm.n': [ + "name[quux, strange];python_version<'2.7' and platform_version=='2'", + ], + } + + expected = [ + 'foo[ext1,ext2,ext3]==1,>=1@ git+http://github.com/foo/foo.git # from collection a.b,c.d,e.f', + 'bar; python_version < "2.7" # from collection a.b', + 'a # from collection a.b', + 'name<2,<=1,>=3@ http://foo.com # from collection a.b,c.d,e.f,g.h,i.j', + 'bar; python_version < "3.6" # from collection c.d', + 'name[bar,fred]@ http://foo.com ; python_version == "2.7" # from collection k.l', + 'name[quux,strange]; python_version < "2.7" and platform_version == "2" # from collection m.n' + ] + + assert sanitize_requirements(reqs) == expected From 2210359295b666758cce39e558139247de2e97eb Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Tue, 30 Jan 2024 15:42:12 -0600 Subject: [PATCH 06/28] ordered, unique collection backrefs --- src/ansible_builder/_target_scripts/introspect.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index 5dc79153..2d9ac9af 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -345,7 +345,7 @@ def sanitize_requirements(collection_py_reqs): logger.warning('Warning: failed to parse requirements from %s, error: %s', collection, e) continue req.name = canonicalize_name(req.name) - req.collections = [collection] # add backref for later + req.collections = {collection: None} # add backref for later key = (req.name, req.marker) if (prior_req := consolidated.get(key)): specifiers = f'{prior_req.specifier},{req.specifier}' @@ -355,7 +355,7 @@ def sanitize_requirements(collection_py_reqs): # The first URL seen wins prior_req.url = req.url prior_req.extras.update(req.extras) - prior_req.collections.append(collection) + prior_req.collections.update({collection: None}) continue consolidated[key] = req From 33c237920ca3c24b4c94f504708ba75dfc7106d6 Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Mon, 5 Feb 2024 15:15:39 -0600 Subject: [PATCH 07/28] Nuke --sanitize --- docs/collection_metadata.rst | 8 +- setup.cfg | 1 - .../_target_scripts/introspect.py | 86 +++---------------- src/ansible_builder/containerfile.py | 4 +- test/integration/test_help.py | 2 +- test/integration/test_introspect_cli.py | 35 ++------ test/unit/test_introspect.py | 34 ++++---- test/unit/test_requirements.py | 58 ------------- 8 files changed, 44 insertions(+), 184 deletions(-) delete mode 100644 test/unit/test_requirements.py diff --git a/docs/collection_metadata.rst b/docs/collection_metadata.rst index 3732ba1d..b0db4000 100644 --- a/docs/collection_metadata.rst +++ b/docs/collection_metadata.rst @@ -35,7 +35,7 @@ If the ``meta/execution-environment.yml`` file is not present, by default, Ansib Dependency introspection ======================== -If any dependencies are given, the introspection is run by Ansible Builder so that the requirements are found and sanitized (deduped) before container image assembly. +If any dependencies are given, the introspection is run by Ansible Builder so that the requirements are found before container image assembly. A user can see the introspection output during the builder intermediate phase using the ``build -v3`` option. @@ -63,13 +63,11 @@ Run the ``introspect`` command against your collection path: :: - ansible-builder introspect --sanitize COLLECTION_PATH + ansible-builder introspect COLLECTION_PATH The default collection path used by the ``ansible-galaxy`` command is ``~/.ansible/collections/``. Read more about collection paths in the `Ansible configuration settings `_ guide. -The ``--sanitize`` option reviews all of the collection requirements and removes duplicates. It also removes any Python requirements that should normally be excluded (see :ref:`python_deps` below). - .. note:: Use the ``-v3`` option to ``introspect`` to see logging messages about requirements that are being excluded. @@ -94,7 +92,7 @@ Then, if the ``ansible_collection`` directory is in your home directory, you can :: - ansible-builder introspect --sanitize ~/ + ansible-builder introspect ~/ .. _python_deps: diff --git a/setup.cfg b/setup.cfg index 8ca6c698..add84c0e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,7 +36,6 @@ max-line-length=160 include_package_data = true install_requires = PyYAML - packaging bindep jsonschema setuptools; python_version >= "3.12" diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index 2d9ac9af..45a809d7 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -5,14 +5,11 @@ import sys import yaml -from packaging.requirements import InvalidRequirement, Requirement -from packaging.specifiers import SpecifierSet -from packaging.utils import canonicalize_name - base_collections_path = '/usr/share/ansible/collections' logger = logging.getLogger(__name__) -COMMENT_RE = re.compile(r'\s*#.*$') +REQ_NORM_RE = re.compile(r'[-_.]+') +REQ_NAME_RE = re.compile(r'^([-\w.]+)') def line_is_empty(line): @@ -206,6 +203,12 @@ def simple_combine(reqs): continue base_line = line.split('#')[0].strip() + name_match = REQ_NAME_RE.match(base_line) + name = REQ_NORM_RE.sub('-', name_match.group(1)) + if name in EXCLUDE_REQUIREMENTS and collection != 'user': + logger.debug('# Excluding requirement %s from %s', name, collection) + continue + if base_line in consolidated: i = consolidated.index(base_line) fancy_lines[i] += f', {collection}' @@ -239,24 +242,17 @@ def parse_args(args=None): def run_introspect(args, log): data = process(args.folder, user_pip=args.user_pip, user_bindep=args.user_bindep) - if args.sanitize: - log.info('# Sanitized dependencies for %s', args.folder) - data_for_write = data - data['python'] = sanitize_requirements(data['python']) - data['system'] = simple_combine(data['system']) - else: - log.info('# Dependency data for %s', args.folder) - data_for_write = data.copy() - data_for_write['python'] = simple_combine(data['python']) - data_for_write['system'] = simple_combine(data['system']) + log.info('# Dependency data for %s', args.folder) + data['python'] = simple_combine(data['python']) + data['system'] = simple_combine(data['system']) print('---') print(yaml.dump(data, default_flow_style=False)) if args.write_pip and data.get('python'): - write_file(args.write_pip, data_for_write.get('python') + ['']) + write_file(args.write_pip, data.get('python') + ['']) if args.write_bindep and data.get('system'): - write_file(args.write_bindep, data_for_write.get('system') + ['']) + write_file(args.write_bindep, data.get('system') + ['']) sys.exit(0) @@ -272,9 +268,7 @@ def create_introspect_parser(parser): ) ) introspect_parser.add_argument('--sanitize', action='store_true', - help=('Sanitize and de-duplicate requirements. ' - 'This is normally done separately from the introspect script, but this ' - 'option is given to more accurately test collection content.')) + help=argparse.SUPPRESS) introspect_parser.add_argument( 'folder', default=base_collections_path, nargs='?', @@ -319,58 +313,6 @@ def create_introspect_parser(parser): )) -def sanitize_requirements(collection_py_reqs): - """ - Cleanup Python requirements by removing duplicates and excluded packages. - - The user requirements file will go through the deduplication process, but - skips the special package exclusion process. - - :param dict collection_py_reqs: A dict of lists of Python requirements, keyed - by fully qualified collection name. The special key `user` holds requirements - from the user specified requirements file from the ``--user-pip`` CLI option. - - :returns: A finalized list of sanitized Python requirements. - """ - # de-duplication - consolidated = {} - - for collection, lines in collection_py_reqs.items(): - for line in lines: - if not (line := COMMENT_RE.sub('', line.strip())): - continue - try: - req = Requirement(line) - except InvalidRequirement as e: - logger.warning('Warning: failed to parse requirements from %s, error: %s', collection, e) - continue - req.name = canonicalize_name(req.name) - req.collections = {collection: None} # add backref for later - key = (req.name, req.marker) - if (prior_req := consolidated.get(key)): - specifiers = f'{prior_req.specifier},{req.specifier}' - prior_req.specifier = SpecifierSet(specifiers) - if not prior_req.url and req.url: - # An explicit install URL is preferred over none - # The first URL seen wins - prior_req.url = req.url - prior_req.extras.update(req.extras) - prior_req.collections.update({collection: None}) - continue - consolidated[key] = req - - # removal of unwanted packages - sanitized = [] - for (name, _marker), req in consolidated.items(): - # Exclude packages, unless it was present in the user supplied requirements. - if name.lower() in EXCLUDE_REQUIREMENTS and 'user' not in req.collections: - logger.debug('# Excluding requirement %s from %s', req.name, req.collections) - continue - sanitized.append(f'{req} # from collection {",".join(req.collections)}') - - return sanitized - - def write_file(filename: str, lines: list) -> bool: parent_dir = os.path.dirname(filename) if parent_dir and not os.path.exists(parent_dir): diff --git a/src/ansible_builder/containerfile.py b/src/ansible_builder/containerfile.py index df02225f..080ca962 100644 --- a/src/ansible_builder/containerfile.py +++ b/src/ansible_builder/containerfile.py @@ -144,7 +144,7 @@ def prepare(self) -> None: self._insert_global_args() if image == "base": - self.steps.append("RUN $PYCMD -m pip install --no-cache-dir bindep pyyaml packaging") + self.steps.append("RUN $PYCMD -m pip install --no-cache-dir bindep pyyaml") else: # For an EE schema earlier than v3 with a custom builder image, we always make sure pip is available. context_dir = Path(self.build_outputs_dir).stem @@ -425,7 +425,7 @@ def _prepare_introspect_assemble_steps(self) -> None: # The introspect/assemble block is valid if there are any form of requirements if any(self.definition.get_dep_abs_path(thing) for thing in ('galaxy', 'system', 'python')): - introspect_cmd = "RUN $PYCMD /output/scripts/introspect.py introspect --sanitize" + introspect_cmd = "RUN $PYCMD /output/scripts/introspect.py introspect" requirements_file_exists = os.path.exists(os.path.join( self.build_outputs_dir, constants.CONTEXT_FILES['python'] diff --git a/test/integration/test_help.py b/test/integration/test_help.py index 73c349d0..1748d437 100644 --- a/test/integration/test_help.py +++ b/test/integration/test_help.py @@ -38,5 +38,5 @@ def test_create_help(cli): def test_introspect_help(cli): result = cli('ansible-builder introspect --help', check=False) help_text = result.stdout - assert 'usage: ansible-builder introspect [-h] [--sanitize]' in help_text + assert 'usage: ansible-builder introspect [-h]' in help_text assert re.search(r'Loops over collections in folder', help_text) diff --git a/test/integration/test_introspect_cli.py b/test/integration/test_introspect_cli.py index 6d472767..13919fa2 100644 --- a/test/integration/test_introspect_cli.py +++ b/test/integration/test_introspect_cli.py @@ -6,15 +6,7 @@ def test_introspect_write(cli, data_dir): data = yaml.safe_load(r.stdout) # assure that output is valid YAML assert 'python' in data assert 'system' in data - assert 'pytz' in data['python']['test.reqfile'] - - -def test_introspect_with_sanitize(cli, data_dir): - r = cli(f'ansible-builder introspect --sanitize {data_dir}') - data = yaml.safe_load(r.stdout) # assure that output is valid YAML - assert 'python' in data - assert 'system' in data - assert '# from collection test.bindep' in r.stdout # should have comments + assert 'pytz # from collection test.reqfile' in r.stdout def test_introspect_write_bindep(cli, data_dir, tmp_path): @@ -43,31 +35,16 @@ def test_introspect_write_python(cli, data_dir, tmp_path): ]) -def test_introspect_write_python_and_sanitize(cli, data_dir, tmp_path): - dest_file = tmp_path / 'req.txt' - cli(f'ansible-builder introspect {data_dir} --write-pip={dest_file} --sanitize') - - assert dest_file.read_text() == '\n'.join([ - 'pyvcloud>=14,>=18.0.10 # from collection test.metadata,test.reqfile', - 'pytz # from collection test.reqfile', - 'python-dateutil>=2.8.2 # from collection test.reqfile', - 'jinja2>=3.0 # from collection test.reqfile', - 'tacacs-plus # from collection test.reqfile', - '', - ]) - - -def test_introspect_with_sanitize_user_reqs(cli, data_dir, tmp_path): +def test_introspect_with_user_reqs(cli, data_dir, tmp_path): user_file = tmp_path / 'requirements.txt' user_file.write_text("ansible\npytest\n") - r = cli(f'ansible-builder introspect --sanitize --user-pip={user_file} {data_dir}') + r = cli(f'ansible-builder introspect --user-pip={user_file} {data_dir}') data = yaml.safe_load(r.stdout) # assure that output is valid YAML assert 'python' in data assert 'system' in data - assert 'pytz # from collection test.reqfile' in data['python'] - + assert 'pytz # from collection test.reqfile' in r.stdout # 'ansible' allowed in user requirements - assert 'ansible # from collection user' in data['python'] + assert 'ansible # from collection user' in r.stdout # 'pytest' allowed in user requirements - assert 'pytest # from collection user' in data['python'] + assert 'pytest # from collection user' in r.stdout diff --git a/test/unit/test_introspect.py b/test/unit/test_introspect.py index 02cfb014..8a4b067d 100644 --- a/test/unit/test_introspect.py +++ b/test/unit/test_introspect.py @@ -2,26 +2,23 @@ import pytest from ansible_builder._target_scripts.introspect import process, process_collection -from ansible_builder._target_scripts.introspect import simple_combine, sanitize_requirements +from ansible_builder._target_scripts.introspect import simple_combine from ansible_builder._target_scripts.introspect import parse_args def test_multiple_collection_metadata(data_dir): files = process(data_dir) - files['python'] = sanitize_requirements(files['python']) + files['python'] = simple_combine(files['python']) files['system'] = simple_combine(files['system']) assert files == {'python': [ - 'pyvcloud>=14,>=18.0.10 # from collection test.metadata,test.reqfile', + 'pyvcloud>=14 # from collection test.metadata', 'pytz # from collection test.reqfile', - # python-dateutil should appear only once even though referenced in - # multiple places, once with a dash and another with an underscore in the name. 'python-dateutil>=2.8.2 # from collection test.reqfile', - # jinja2 should appear only once even though referenced in multiple - # places, once with uppercase and another with lowercase in the name. 'jinja2>=3.0 # from collection test.reqfile', - 'tacacs-plus # from collection test.reqfile' + 'tacacs_plus # from collection test.reqfile', + 'pyvcloud>=18.0.10 # from collection test.reqfile' ], 'system': [ 'subversion [platform:rpm] # from collection test.bindep', 'subversion [platform:dpkg] # from collection test.bindep' @@ -53,7 +50,7 @@ def test_parse_args_default_action(): parser = parse_args( [ - action, '--sanitize', + action, f'--user-pip={user_pip}', f'--user-bindep={user_bindep}', f'--write-pip={write_pip}', @@ -62,7 +59,6 @@ def test_parse_args_default_action(): ) assert parser.action == action - assert parser.sanitize assert parser.user_pip == user_pip assert parser.user_bindep == user_bindep assert parser.write_pip == write_pip @@ -117,13 +113,19 @@ def test_sanitize_pep508(): } expected = [ - 'foo[ext1,ext2,ext3]==1,>=1@ git+http://github.com/foo/foo.git # from collection a.b,c.d,e.f', + 'foo[ext1,ext3] == 1 # from collection a.b', 'bar; python_version < "2.7" # from collection a.b', - 'a # from collection a.b', - 'name<2,<=1,>=3@ http://foo.com # from collection a.b,c.d,e.f,g.h,i.j', + 'A # from collection a.b', + 'name # from collection a.b', + 'FOO >= 1 # from collection c.d', 'bar; python_version < "3.6" # from collection c.d', - 'name[bar,fred]@ http://foo.com ; python_version == "2.7" # from collection k.l', - 'name[quux,strange]; python_version < "2.7" and platform_version == "2" # from collection m.n' + 'name<=1 # from collection c.d', + 'foo[ext2] @ git+http://github.com/foo/foo.git # from collection e.f', + 'name>=3 # from collection e.f', + 'name>=3,<2 # from collection g.h', + 'name@http://foo.com # from collection i.j', + "name [fred,bar] @ http://foo.com ; python_version=='2.7' # from collection k.l", + "name[quux, strange];python_version<'2.7' and platform_version=='2' # from collection m.n" ] - assert sanitize_requirements(reqs) == expected + assert simple_combine(reqs) == expected diff --git a/test/unit/test_requirements.py b/test/unit/test_requirements.py deleted file mode 100644 index 42cb017c..00000000 --- a/test/unit/test_requirements.py +++ /dev/null @@ -1,58 +0,0 @@ -from ansible_builder._target_scripts.introspect import sanitize_requirements - - -def test_combine_entries(): - assert sanitize_requirements({ - 'foo.bar': ['foo>1.0'], - 'bar.foo': ['foo>=2.0'] - }) == ['foo>1.0,>=2.0 # from collection foo.bar,bar.foo'] - - -def test_remove_unwanted_requirements(): - assert sanitize_requirements({ - 'foo.bar': [ - 'foo', - 'ansible', - 'bar', - ], - 'bar.foo': [ - 'pytest', - 'bar', - 'zoo' - ] - }) == [ - 'foo # from collection foo.bar', - 'bar # from collection foo.bar,bar.foo', - 'zoo # from collection bar.foo' - ] - - -def test_skip_bad_formats(): - """A single incorrectly formatted requirement should warn, but not block other reqs""" - assert sanitize_requirements({'foo.bar': [ - 'foo', - 'bar' - ], 'foo.bad': ['zizzer zazzer zuzz'] # not okay - }) == ['foo # from collection foo.bar', 'bar # from collection foo.bar'] - - -def test_sanitize_requirements_do_not_exclude(): - py_reqs = { - 'foo.bar': [ - 'foo', - 'ansible', # should not appear - 'bar', - ], - 'user': [ - 'pytest', # should appear - 'bar', - 'zoo' - ] - } - - assert sanitize_requirements(py_reqs) == [ - 'foo # from collection foo.bar', - 'bar # from collection foo.bar,user', - 'pytest # from collection user', - 'zoo # from collection user', - ] From d1904b0c30152867b3072230b05b9e4048913c34 Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Thu, 21 Mar 2024 09:47:22 -0500 Subject: [PATCH 08/28] Expose a way to exclude deps - option 2 --- .../_target_scripts/introspect.py | 50 +++++++++++++--- src/ansible_builder/containerfile.py | 59 +++++++++++++++---- src/ansible_builder/ee_schema.py | 22 ++++++- src/ansible_builder/user_definition.py | 20 +++++-- 4 files changed, 123 insertions(+), 28 deletions(-) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index 45a809d7..365e799f 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -75,7 +75,8 @@ def process_collection(path): return (pip_lines, bindep_lines) -def process(data_dir=base_collections_path, user_pip=None, user_bindep=None): +def process(data_dir=base_collections_path, user_pip=None, user_bindep=None, + user_pip_exclude=None, user_bindep_exclude=None): paths = [] path_root = os.path.join(data_dir, 'ansible_collections') @@ -112,10 +113,18 @@ def process(data_dir=base_collections_path, user_pip=None, user_bindep=None): col_pip_lines = pip_file_data(user_pip) if col_pip_lines: py_req['user'] = col_pip_lines + if user_pip_exclude: + col_pip_exclude_lines = pip_file_data(user_pip_exclude) + if col_pip_exclude_lines: + py_req['exclude'] = col_pip_exclude_lines if user_bindep: col_sys_lines = bindep_file_data(user_bindep) if col_sys_lines: sys_req['user'] = col_sys_lines + if user_bindep_exclude: + col_sys_exclude_lines = bindep_file_data(user_bindep_exclude) + if col_sys_exclude_lines: + sys_req['exclude'] = col_sys_exclude_lines return { 'python': py_req, @@ -190,11 +199,14 @@ def get_dependency(self, entry): return req_file -def simple_combine(reqs): +def simple_combine(reqs, exclude=None, name_only=False): """Given a dictionary of requirement lines keyed off collections, return a list with the most basic of de-duplication logic, and comments indicating the sources based off the collection keys """ + if exclude is None: + exclude = [] + consolidated = [] fancy_lines = [] for collection, lines in reqs.items(): @@ -205,15 +217,22 @@ def simple_combine(reqs): base_line = line.split('#')[0].strip() name_match = REQ_NAME_RE.match(base_line) name = REQ_NORM_RE.sub('-', name_match.group(1)) - if name in EXCLUDE_REQUIREMENTS and collection != 'user': + if name in exclude and collection not in {'user', 'exclude'}: + logger.debug('# Explicitly excluding requirement %s from %s', name, collection) + continue + if name in EXCLUDE_REQUIREMENTS and collection not in {'user', 'exclude'}: logger.debug('# Excluding requirement %s from %s', name, collection) continue if base_line in consolidated: i = consolidated.index(base_line) - fancy_lines[i] += f', {collection}' + if not name_only: + fancy_lines[i] += f', {collection}' else: - fancy_line = f'{base_line} # from collection {collection}' + if name_only: + fancy_line = name + else: + fancy_line = f'{base_line} # from collection {collection}' consolidated.append(base_line) fancy_lines.append(fancy_line) @@ -241,10 +260,17 @@ def parse_args(args=None): def run_introspect(args, log): - data = process(args.folder, user_pip=args.user_pip, user_bindep=args.user_bindep) + data = process(args.folder, user_pip=args.user_pip, user_bindep=args.user_bindep, + user_pip_exclude=args.user_pip_exclude, user_bindep_exclude=args.user_bindep_exclude) log.info('# Dependency data for %s', args.folder) - data['python'] = simple_combine(data['python']) - data['system'] = simple_combine(data['system']) + data['python'] = simple_combine( + data['python'], + exclude=simple_combine({'exclude': data['python'].pop('exclude', {})}, name_only=True) + ) + data['system'] = simple_combine( + data['system'], + exclude=simple_combine({'exclude': data['system'].pop('exclude', {})}, name_only=True) + ) print('---') print(yaml.dump(data, default_flow_style=False)) @@ -284,10 +310,18 @@ def create_introspect_parser(parser): '--user-pip', dest='user_pip', help='An additional file to combine with collection pip requirements.' ) + introspect_parser.add_argument( + '--user-pip-exclude', dest='user_pip_exclude', + help='An additional file to exclude specific pip requirements.' + ) introspect_parser.add_argument( '--user-bindep', dest='user_bindep', help='An additional file to combine with collection bindep requirements.' ) + introspect_parser.add_argument( + '--user-bindep-exclude', dest='user_bindep_exclude', + help='An additional file to exclude specific bindep requirements.' + ) introspect_parser.add_argument( '--write-pip', dest='write_pip', help='Write the combined pip requirements file to this location.' diff --git a/src/ansible_builder/containerfile.py b/src/ansible_builder/containerfile.py index 080ca962..14462278 100644 --- a/src/ansible_builder/containerfile.py +++ b/src/ansible_builder/containerfile.py @@ -259,16 +259,19 @@ def _create_folder_copy_files(self) -> None: if not new_name: continue - requirement_path = self.definition.get_dep_abs_path(item) - if requirement_path is None: - continue - dest = os.path.join( - self.build_context, constants.user_content_subfolder, new_name) + for exclude in (False, True): + if exclude is True: + new_name = f'exclude-{new_name}' + requirement_path = self.definition.get_dep_abs_path(item, exclude=exclude) + if requirement_path is None: + continue + dest = os.path.join( + self.build_context, constants.user_content_subfolder, new_name) - # Ignore modification time of the requirement file because we could - # be writing it out dynamically (inline EE reqs), and we only care - # about the contents anyway. - copy_file(requirement_path, dest, ignore_mtime=True) + # Ignore modification time of the requirement file because we could + # be writing it out dynamically (inline EE reqs), and we only care + # about the contents anyway. + copy_file(requirement_path, dest, ignore_mtime=True) if self.original_galaxy_keyring: copy_file( @@ -384,7 +387,12 @@ def _prepare_label_steps(self) -> None: ]) def _prepare_build_context(self) -> None: - if any(self.definition.get_dep_abs_path(thing) for thing in ('galaxy', 'system', 'python')): + deps: list[str] = [] + for exclude in (False, True): + deps.extend( + self.definition.get_dep_abs_path(thing, exclude=exclude) for thing in ('galaxy', 'system', 'python') + ) + if any(deps): self.steps.extend([ f"COPY {constants.user_content_subfolder} /build", "WORKDIR /build", @@ -423,8 +431,12 @@ def _prepare_galaxy_install_steps(self) -> None: def _prepare_introspect_assemble_steps(self) -> None: # The introspect/assemble block is valid if there are any form of requirements - if any(self.definition.get_dep_abs_path(thing) for thing in ('galaxy', 'system', 'python')): - + deps: list[str] = [] + for exclude in (False, True): + deps.extend( + self.definition.get_dep_abs_path(thing, exclude=exclude) for thing in ('galaxy', 'system', 'python') + ) + if any(deps): introspect_cmd = "RUN $PYCMD /output/scripts/introspect.py introspect" requirements_file_exists = os.path.exists(os.path.join( @@ -439,12 +451,35 @@ def _prepare_introspect_assemble_steps(self) -> None: self.steps.append(f"COPY {relative_requirements_path} {constants.CONTEXT_FILES['python']}") # WORKDIR is /build, so we use the (shorter) relative paths there introspect_cmd += f" --user-pip={constants.CONTEXT_FILES['python']}" + + pip_exclude_exists = os.path.exists(os.path.join( + self.build_outputs_dir, f"exclude-{constants.CONTEXT_FILES['python']}" + )) + if pip_exclude_exists: + relative_pip_exclude_path = os.path.join( + constants.user_content_subfolder, + f"exclude-{constants.CONTEXT_FILES['python']}" + ) + self.steps.append(f"COPY {relative_pip_exclude_path} exclude-{constants.CONTEXT_FILES['python']}") + introspect_cmd += f" --user-pip-exclude=exclude-{constants.CONTEXT_FILES['python']}" + bindep_exists = os.path.exists(os.path.join(self.build_outputs_dir, constants.CONTEXT_FILES['system'])) if bindep_exists: relative_bindep_path = os.path.join(constants.user_content_subfolder, constants.CONTEXT_FILES['system']) self.steps.append(f"COPY {relative_bindep_path} {constants.CONTEXT_FILES['system']}") introspect_cmd += f" --user-bindep={constants.CONTEXT_FILES['system']}" + exclude_bindep_exists = os.path.exists(os.path.join( + self.build_outputs_dir, f"exclude-{constants.CONTEXT_FILES['system']}" + )) + if exclude_bindep_exists: + relative_exclude_bindep_path = os.path.join( + constants.user_content_subfolder, + f"exclude-{constants.CONTEXT_FILES['system']}" + ) + self.steps.append(f"COPY {relative_exclude_bindep_path} exclude-{constants.CONTEXT_FILES['system']}") + introspect_cmd += f" --user-bindep-exclude=exclude-{constants.CONTEXT_FILES['system']}" + introspect_cmd += " --write-bindep=/tmp/src/bindep.txt --write-pip=/tmp/src/requirements.txt" self.steps.append(introspect_cmd) diff --git a/src/ansible_builder/ee_schema.py b/src/ansible_builder/ee_schema.py index 9cdda75b..d9004c78 100644 --- a/src/ansible_builder/ee_schema.py +++ b/src/ansible_builder/ee_schema.py @@ -1,3 +1,4 @@ +from copy import deepcopy import os from jsonschema import validate, SchemaError, ValidationError @@ -390,16 +391,31 @@ }, } +schema_v31: dict = deepcopy(schema_v3) +schema_v31['properties']['dependencies']['properties']['exclude'] = { + "python": TYPE_StringOrListOfStrings, + "system": TYPE_StringOrListOfStrings, +} + + +def float_or_int(v): + if isinstance(v, (float, int)): + return v + if isinstance(v, str): + if '.' in v: + return float(v) + return int(v) + def validate_schema(ee_def: dict): schema_version = 1 if 'version' in ee_def: try: - schema_version = int(ee_def['version']) + schema_version = float_or_int(ee_def['version']) except ValueError as e: raise DefinitionError(f"Schema version not an integer: {ee_def['version']}") from e - if schema_version not in (1, 2, 3): + if schema_version not in (1, 2, 3, 3.1): raise DefinitionError(f"Unsupported schema version: {schema_version}") try: @@ -409,6 +425,8 @@ def validate_schema(ee_def: dict): validate(instance=ee_def, schema=schema_v2) elif schema_version == 3: validate(instance=ee_def, schema=schema_v3) + elif schema_version == 3.1: + validate(instance=ee_def, schema=schema_v31) except (SchemaError, ValidationError) as e: raise DefinitionError(msg=e.message, path=e.absolute_schema_path) from e diff --git a/src/ansible_builder/user_definition.py b/src/ansible_builder/user_definition.py index 399283d7..ba04eb47 100644 --- a/src/ansible_builder/user_definition.py +++ b/src/ansible_builder/user_definition.py @@ -1,5 +1,6 @@ from __future__ import annotations +import functools import logging import os import textwrap @@ -179,12 +180,18 @@ def container_init(self): def options(self): return self.raw.get('options', {}) - def get_dep_abs_path(self, entry): + # This is the size of galaxy, python, system * exclude=True/False + @functools.lru_cache(maxsize=6) + def get_dep_abs_path(self, entry, exclude=False): """Unique to the user EE definition, files can be referenced by either an absolute path or a path relative to the EE definition folder This method will return the absolute path. """ - req_file = self.raw.get('dependencies', {}).get(entry) + deps = self.raw.get('dependencies', {}) + if exclude: + deps = deps.get('exclude', {}) + + req_file = deps.get(entry) if not req_file: return None @@ -240,10 +247,11 @@ def validate(self): # HACK: non-file deps for dynamic base/builder if not value: continue - requirement_path = self.get_dep_abs_path(item) - if requirement_path: - if not os.path.exists(requirement_path): - raise DefinitionError(f"Dependency file {requirement_path} does not exist.") + for exclude in (False, True): + requirement_path = self.get_dep_abs_path(item, exclude=exclude) + if requirement_path: + if not os.path.exists(requirement_path): + raise DefinitionError(f"Dependency file {requirement_path} does not exist.") # Validate and set any user-specified build arguments build_arg_defaults = self.raw.get('build_arg_defaults') From 0da816a45b72b408d2c41a2d8bf3b944469745d4 Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Thu, 21 Mar 2024 09:54:48 -0500 Subject: [PATCH 09/28] Comment about names and normalization --- src/ansible_builder/_target_scripts/introspect.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index 365e799f..553cf811 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -8,6 +8,7 @@ base_collections_path = '/usr/share/ansible/collections' logger = logging.getLogger(__name__) +# https://peps.python.org/pep-0503/#normalized-names REQ_NORM_RE = re.compile(r'[-_.]+') REQ_NAME_RE = re.compile(r'^([-\w.]+)') From 149bdfc4964305d67e7cd6d101526f10eb6e1468 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Tue, 26 Mar 2024 11:12:33 -0400 Subject: [PATCH 10/28] rebase and remove setuptools dep --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index add84c0e..da5c90ce 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,7 +38,6 @@ install_requires = PyYAML bindep jsonschema - setuptools; python_version >= "3.12" python_requires = >=3.9 From 207a59c26534710c6e6c3253c9f0bb48cb9680a7 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Wed, 27 Mar 2024 09:47:10 -0400 Subject: [PATCH 11/28] Revert change to remove -r in unit test --- test/data/ansible_collections/test/reqfile/extra_req.txt | 2 ++ test/data/ansible_collections/test/reqfile/requirements.txt | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 test/data/ansible_collections/test/reqfile/extra_req.txt diff --git a/test/data/ansible_collections/test/reqfile/extra_req.txt b/test/data/ansible_collections/test/reqfile/extra_req.txt new file mode 100644 index 00000000..81a47c4b --- /dev/null +++ b/test/data/ansible_collections/test/reqfile/extra_req.txt @@ -0,0 +1,2 @@ +tacacs_plus +pyvcloud>=18.0.10 # duplicate with test.metadata collection diff --git a/test/data/ansible_collections/test/reqfile/requirements.txt b/test/data/ansible_collections/test/reqfile/requirements.txt index 96b9a3fa..6141c4ce 100644 --- a/test/data/ansible_collections/test/reqfile/requirements.txt +++ b/test/data/ansible_collections/test/reqfile/requirements.txt @@ -1,5 +1,4 @@ pytz python-dateutil>=2.8.2 # intentional dash jinja2>=3.0 # intentional lowercase -tacacs_plus -pyvcloud>=18.0.10 # duplicate with test.metadata collection +-r extra_req.txt From aaf742f9ceb770cd9c23890950d198d120d422ad Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Thu, 28 Mar 2024 12:19:22 -0400 Subject: [PATCH 12/28] Revert new 3.1 schema, more docs --- docs/definition.rst | 27 +++++++++++++++++++++++++++ src/ansible_builder/ee_schema.py | 26 ++++++-------------------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/docs/definition.rst b/docs/definition.rst index 22576854..9a36382c 100644 --- a/docs/definition.rst +++ b/docs/definition.rst @@ -50,6 +50,11 @@ Here is a sample version 3 EE file. To use Ansible Builder 3.x, you must specify - six - psutil system: bindep.txt + exclude: + python: + - docker + system: + - python3-Cython images: base_image: @@ -245,6 +250,28 @@ The following keys are valid for this section: The system packages to be installed, in bindep format. This may either be a filename, or a list of requirements (see below for an example). + ``exclude`` + A list of Python or system requirements to be excluded from the top-level dependency requirements + of referenced collections. These exclusions will not apply to the user supplied Python or system + dependencies, nor will they apply to dependencies of dependencies (top-level only). Python dependency + exclusions should be a list of package names appearing under the ``python`` key name. System dependency + exclusions should be a list of system package names appearing under the ``system`` key name. + + Example: + + .. code:: yaml + + dependencies: + exclude: + python: + - docker + system: + - python3-Cython + + .. note:: + The ``exclude`` option requires ``ansible-builder`` version ``3.1`` or newer. + + The following example uses filenames that contain various dependencies: .. code:: yaml diff --git a/src/ansible_builder/ee_schema.py b/src/ansible_builder/ee_schema.py index d9004c78..08c88579 100644 --- a/src/ansible_builder/ee_schema.py +++ b/src/ansible_builder/ee_schema.py @@ -1,4 +1,3 @@ -from copy import deepcopy import os from jsonschema import validate, SchemaError, ValidationError @@ -273,6 +272,10 @@ {"required": ["package_pip"]}, ], }, + "exclude": { + "python": TYPE_StringOrListOfStrings, + "system": TYPE_StringOrListOfStrings, + }, }, }, @@ -391,31 +394,16 @@ }, } -schema_v31: dict = deepcopy(schema_v3) -schema_v31['properties']['dependencies']['properties']['exclude'] = { - "python": TYPE_StringOrListOfStrings, - "system": TYPE_StringOrListOfStrings, -} - - -def float_or_int(v): - if isinstance(v, (float, int)): - return v - if isinstance(v, str): - if '.' in v: - return float(v) - return int(v) - def validate_schema(ee_def: dict): schema_version = 1 if 'version' in ee_def: try: - schema_version = float_or_int(ee_def['version']) + schema_version = int(ee_def['version']) except ValueError as e: raise DefinitionError(f"Schema version not an integer: {ee_def['version']}") from e - if schema_version not in (1, 2, 3, 3.1): + if schema_version not in (1, 2, 3): raise DefinitionError(f"Unsupported schema version: {schema_version}") try: @@ -425,8 +413,6 @@ def validate_schema(ee_def: dict): validate(instance=ee_def, schema=schema_v2) elif schema_version == 3: validate(instance=ee_def, schema=schema_v3) - elif schema_version == 3.1: - validate(instance=ee_def, schema=schema_v31) except (SchemaError, ValidationError) as e: raise DefinitionError(msg=e.message, path=e.absolute_schema_path) from e From b69e27b54bca9ccfd79ea4c63df0701ce95a8e6e Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Mon, 1 Apr 2024 16:04:33 -0400 Subject: [PATCH 13/28] Better comment handling to not lose URL anchors, pass thru non-pep508 data --- .../_target_scripts/introspect.py | 26 +++++--- test/unit/test_introspect.py | 62 +++++++++++++++++++ 2 files changed, 81 insertions(+), 7 deletions(-) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index 553cf811..163a1739 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -8,9 +8,12 @@ base_collections_path = '/usr/share/ansible/collections' logger = logging.getLogger(__name__) +# regex for a comment at the start of a line, or embedded with leading space(s) +COMMENT_RE = re.compile(r'(?:^|\s+)#.*$') + # https://peps.python.org/pep-0503/#normalized-names REQ_NORM_RE = re.compile(r'[-_.]+') -REQ_NAME_RE = re.compile(r'^([-\w.]+)') +REQ_NAME_RE = re.compile(r'^([^-][-\w.]+)') def line_is_empty(line): @@ -201,9 +204,12 @@ def get_dependency(self, entry): def simple_combine(reqs, exclude=None, name_only=False): - """Given a dictionary of requirement lines keyed off collections, - return a list with the most basic of de-duplication logic, - and comments indicating the sources based off the collection keys + """ + Given a dictionary of Python requirement lines keyed off collections, + return a list a cleaned up (no source comments) requirements annotated + with comments indicating the sources based off the collection keys. + + Currently, non-pep508 compliant entries are passed through. """ if exclude is None: exclude = [] @@ -212,11 +218,17 @@ def simple_combine(reqs, exclude=None, name_only=False): fancy_lines = [] for collection, lines in reqs.items(): for line in lines: - if line_is_empty(line): + # strip comments + if not (base_line := COMMENT_RE.sub('', line.strip())): + continue + + # Did not match expected name format (maybe a pip option?), just pass thru + if not (name_match := REQ_NAME_RE.match(base_line)): + logger.warning("Passing through unparsable requirement: %s", base_line) + fancy_line = f'{base_line} # from collection {collection}' + fancy_lines.append(fancy_line) continue - base_line = line.split('#')[0].strip() - name_match = REQ_NAME_RE.match(base_line) name = REQ_NORM_RE.sub('-', name_match.group(1)) if name in exclude and collection not in {'user', 'exclude'}: logger.debug('# Explicitly excluding requirement %s from %s', name, collection) diff --git a/test/unit/test_introspect.py b/test/unit/test_introspect.py index 8a4b067d..dca9c086 100644 --- a/test/unit/test_introspect.py +++ b/test/unit/test_introspect.py @@ -129,3 +129,65 @@ def test_sanitize_pep508(): ] assert simple_combine(reqs) == expected + + +def test_comment_parsing(): + """ + Test that simple_combine() does not remove embedded URL anchors due to comment parsing. + """ + reqs = { + 'a.b': [ + '# comment 1', + 'git+https://git.repo/some_pkg.git#egg=SomePackage', + 'git+https://git.repo/some_pkg.git#egg=SomeOtherPackage # inline comment', + 'git+https://git.repo/some_pkg.git#egg=AlsoSomePackage #inline comment that hates leading spaces', + ' # crazy indented comment (waka waka!)', + '####### something informative' + ' ', + '', + ] + } + + expected = [ + 'git+https://git.repo/some_pkg.git#egg=SomePackage # from collection a.b', + 'git+https://git.repo/some_pkg.git#egg=SomeOtherPackage # from collection a.b', + 'git+https://git.repo/some_pkg.git#egg=AlsoSomePackage # from collection a.b', + ] + + assert simple_combine(reqs) == expected + + +def test_pass_thru(): + """ + Test that simple_combine() will pass through non-pep508 data. + """ + reqs = { + # various VCS and URL options + 'a.b': [ + 'git+https://git.repo/some_pkg.git#egg=SomePackage', + 'svn+svn://svn.repo/some_pkg/trunk/#egg=SomePackage', + 'https://example.com/foo/foo-0.26.0-py2.py3-none-any.whl', + 'http://my.package.repo/SomePackage-1.0.4.zip', + ], + + # various 'pip install' options + 'c.d': [ + '-i https://pypi.org/simple', + '--extra-index-url http://my.package.repo/simple', + '--no-clean', + '-e svn+http://svn.example.com/svn/MyProject/trunk@2019#egg=MyProject', + ] + } + + expected = [ + 'git+https://git.repo/some_pkg.git#egg=SomePackage # from collection a.b', + 'svn+svn://svn.repo/some_pkg/trunk/#egg=SomePackage # from collection a.b', + 'https://example.com/foo/foo-0.26.0-py2.py3-none-any.whl # from collection a.b', + 'http://my.package.repo/SomePackage-1.0.4.zip # from collection a.b', + '-i https://pypi.org/simple # from collection c.d', + '--extra-index-url http://my.package.repo/simple # from collection c.d', + '--no-clean # from collection c.d', + '-e svn+http://svn.example.com/svn/MyProject/trunk@2019#egg=MyProject # from collection c.d', + ] + + assert simple_combine(reqs) == expected From 6ae9fb417b1d2981bb51a5241e15603bff712648 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Fri, 5 Apr 2024 11:33:57 -0400 Subject: [PATCH 14/28] Add test for excludes and some comments --- .../_target_scripts/introspect.py | 37 +++++++++++++++++-- test/unit/test_introspect.py | 29 +++++++++++++++ 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index 163a1739..35201577 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -13,6 +13,8 @@ # https://peps.python.org/pep-0503/#normalized-names REQ_NORM_RE = re.compile(r'[-_.]+') + +# match anything that doesn't start with a '-' REQ_NAME_RE = re.compile(r'^([^-][-\w.]+)') @@ -79,8 +81,28 @@ def process_collection(path): return (pip_lines, bindep_lines) -def process(data_dir=base_collections_path, user_pip=None, user_bindep=None, - user_pip_exclude=None, user_bindep_exclude=None): +def process(data_dir=base_collections_path, + user_pip=None, + user_bindep=None, + user_pip_exclude=None, + user_bindep_exclude=None): + """ + Build a dictionary of Python and system requirements from any collections + installed in data_dir, and any user specified requirements. + + Example return dict: + { + 'python': { + 'collection.a': ['abc', 'def'], + 'collection.b': ['ghi'], + 'user': ['jkl'], + }, + 'system': { + 'collection.a': ['ZYX'], + 'user': ['WVU'], + }, + } + """ paths = [] path_root = os.path.join(data_dir, 'ansible_collections') @@ -210,6 +232,10 @@ def simple_combine(reqs, exclude=None, name_only=False): with comments indicating the sources based off the collection keys. Currently, non-pep508 compliant entries are passed through. + + :param dict reqs: A dict of Python requirements, keyed by collection name. + :param exclude: + :param bool name_only: If true, requirements will be only the simple name (no versions or annotation). """ if exclude is None: exclude = [] @@ -273,8 +299,11 @@ def parse_args(args=None): def run_introspect(args, log): - data = process(args.folder, user_pip=args.user_pip, user_bindep=args.user_bindep, - user_pip_exclude=args.user_pip_exclude, user_bindep_exclude=args.user_bindep_exclude) + data = process(args.folder, + user_pip=args.user_pip, + user_bindep=args.user_bindep, + user_pip_exclude=args.user_pip_exclude, + user_bindep_exclude=args.user_bindep_exclude) log.info('# Dependency data for %s', args.folder) data['python'] = simple_combine( data['python'], diff --git a/test/unit/test_introspect.py b/test/unit/test_introspect.py index dca9c086..99eb5c7e 100644 --- a/test/unit/test_introspect.py +++ b/test/unit/test_introspect.py @@ -191,3 +191,32 @@ def test_pass_thru(): ] assert simple_combine(reqs) == expected + + +def test_excluded_requirements(): + reqs = { + 'a.b': [ + 'req1', + 'req2==0.1.0', + 'req4 ; python_version<=3.9', + 'git+https://git.repo/some_pkg.git#egg=SomePackage', + ], + 'c.d': [ + 'req1<=2.0.0', + 'req3', + ] + } + + excluded = [ + 'req1', + 'req4', + 'git', # This currently breaks this test since it matches the git+https url after regex parsing + ] + + expected = [ + 'req2==0.1.0 # from collection a.b', + 'git+https://git.repo/some_pkg.git#egg=SomePackage # from collection a.b', + 'req3 # from collection c.d', + ] + + assert simple_combine(reqs, excluded) == expected From 1d55d2956e269a6aafc44a7f01df55612f326fb1 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Mon, 8 Apr 2024 15:26:23 -0400 Subject: [PATCH 15/28] Use packaging to identify non-PEP508 input --- setup.cfg | 1 + .../_target_scripts/introspect.py | 74 +++++++++++--- test/unit/test_introspect.py | 97 +++++++++++++------ 3 files changed, 126 insertions(+), 46 deletions(-) diff --git a/setup.cfg b/setup.cfg index da5c90ce..c0ebe410 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,6 +38,7 @@ install_requires = PyYAML bindep jsonschema + packaging python_requires = >=3.9 diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index 35201577..5bfd4fce 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -5,6 +5,9 @@ import sys import yaml +from packaging.requirements import InvalidRequirement, Requirement + + base_collections_path = '/usr/share/ansible/collections' logger = logging.getLogger(__name__) @@ -225,7 +228,33 @@ def get_dependency(self, entry): return req_file -def simple_combine(reqs, exclude=None, name_only=False): +def is_pep508_compliant(req: str): + try: + Requirement(req) + except InvalidRequirement: + return False + return True + + +def strip_comments(reqs: dict[str, list]) -> dict[str, list]: + """ + Filter any comments out of the Python collection requirements input. + + :param dict reqs: A dict of Python requirements, keyed by collection name. + + :return: Same as the input parameter, except with no comment lines. + """ + result: dict[str, list] = {} + for collection, lines in reqs.items(): + for line in lines: + # strip comments + if (base_line := COMMENT_RE.sub('', line.strip())): + result.setdefault(collection, []).append(base_line) + + return result + + +def simple_combine(reqs, exclude=None, name_only=False, test_pep508=True): """ Given a dictionary of Python requirement lines keyed off collections, return a list a cleaned up (no source comments) requirements annotated @@ -236,43 +265,55 @@ def simple_combine(reqs, exclude=None, name_only=False): :param dict reqs: A dict of Python requirements, keyed by collection name. :param exclude: :param bool name_only: If true, requirements will be only the simple name (no versions or annotation). + :param bool test_pep508: If true, test each line for PEP508 compliance. + + :return: A list of (possibly) annotated requirements. """ if exclude is None: exclude = [] consolidated = [] fancy_lines = [] - for collection, lines in reqs.items(): + + uncommented_reqs = strip_comments(reqs) + + for collection, lines in uncommented_reqs.items(): for line in lines: - # strip comments - if not (base_line := COMMENT_RE.sub('', line.strip())): + if test_pep508 and not is_pep508_compliant(line): + logger.warning( + "Passing through non-PEP508 compliant line '%s' from collection '%s'", + line, collection + ) + fancy_lines.append(line) # We intentionally won't annotate these lines (multi-line?) continue - # Did not match expected name format (maybe a pip option?), just pass thru - if not (name_match := REQ_NAME_RE.match(base_line)): - logger.warning("Passing through unparsable requirement: %s", base_line) - fancy_line = f'{base_line} # from collection {collection}' - fancy_lines.append(fancy_line) + # Did not match expected name format, just pass thru + if not (name_match := REQ_NAME_RE.match(line)): + logger.warning( + "Passing through unparsable requirement name '%s' from collection '%s'", + line, collection + ) + fancy_lines.append(line) continue name = REQ_NORM_RE.sub('-', name_match.group(1)) if name in exclude and collection not in {'user', 'exclude'}: - logger.debug('# Explicitly excluding requirement %s from %s', name, collection) + logger.debug("# Explicitly excluding requirement '%s' from '%s'", name, collection) continue if name in EXCLUDE_REQUIREMENTS and collection not in {'user', 'exclude'}: - logger.debug('# Excluding requirement %s from %s', name, collection) + logger.debug("# Excluding requirement '%s' from '%s'", name, collection) continue - if base_line in consolidated: - i = consolidated.index(base_line) + if line in consolidated: + i = consolidated.index(line) if not name_only: fancy_lines[i] += f', {collection}' else: if name_only: fancy_line = name else: - fancy_line = f'{base_line} # from collection {collection}' - consolidated.append(base_line) + fancy_line = f'{line} # from collection {collection}' + consolidated.append(line) fancy_lines.append(fancy_line) return fancy_lines @@ -311,7 +352,8 @@ def run_introspect(args, log): ) data['system'] = simple_combine( data['system'], - exclude=simple_combine({'exclude': data['system'].pop('exclude', {})}, name_only=True) + exclude=simple_combine({'exclude': data['system'].pop('exclude', {})}, name_only=True), + test_pep508=False ) print('---') diff --git a/test/unit/test_introspect.py b/test/unit/test_introspect.py index 99eb5c7e..052a0d03 100644 --- a/test/unit/test_introspect.py +++ b/test/unit/test_introspect.py @@ -1,16 +1,18 @@ import os import pytest -from ansible_builder._target_scripts.introspect import process, process_collection -from ansible_builder._target_scripts.introspect import simple_combine -from ansible_builder._target_scripts.introspect import parse_args +from ansible_builder._target_scripts.introspect import (parse_args, + process, + process_collection, + simple_combine, + strip_comments) def test_multiple_collection_metadata(data_dir): files = process(data_dir) files['python'] = simple_combine(files['python']) - files['system'] = simple_combine(files['system']) + files['system'] = simple_combine(files['system'], test_pep508=False) assert files == {'python': [ 'pyvcloud>=14 # from collection test.metadata', @@ -115,7 +117,7 @@ def test_sanitize_pep508(): expected = [ 'foo[ext1,ext3] == 1 # from collection a.b', 'bar; python_version < "2.7" # from collection a.b', - 'A # from collection a.b', + 'A', 'name # from collection a.b', 'FOO >= 1 # from collection c.d', 'bar; python_version < "3.6" # from collection c.d', @@ -149,14 +151,49 @@ def test_comment_parsing(): } expected = [ - 'git+https://git.repo/some_pkg.git#egg=SomePackage # from collection a.b', - 'git+https://git.repo/some_pkg.git#egg=SomeOtherPackage # from collection a.b', - 'git+https://git.repo/some_pkg.git#egg=AlsoSomePackage # from collection a.b', + 'git+https://git.repo/some_pkg.git#egg=SomePackage', + 'git+https://git.repo/some_pkg.git#egg=SomeOtherPackage', + 'git+https://git.repo/some_pkg.git#egg=AlsoSomePackage', ] assert simple_combine(reqs) == expected +def test_strip_comments(): + """ + Test that strip_comments() properly removes comments from Python requirements input. + """ + reqs = { + 'a.b': [ + '# comment 1', + 'git+https://git.repo/some_pkg.git#egg=SomePackage', + 'git+https://git.repo/some_pkg.git#egg=SomeOtherPackage # inline comment', + 'git+https://git.repo/some_pkg.git#egg=AlsoSomePackage #inline comment that hates leading spaces', + ' # crazy indented comment (waka waka!)', + '####### something informative' + ' ', + '', + ], + 'c.d': [ + '# comment 2', + 'git', + ] + } + + expected = { + 'a.b': [ + 'git+https://git.repo/some_pkg.git#egg=SomePackage', + 'git+https://git.repo/some_pkg.git#egg=SomeOtherPackage', + 'git+https://git.repo/some_pkg.git#egg=AlsoSomePackage', + ], + 'c.d': [ + 'git', + ] + } + + assert strip_comments(reqs) == expected + + def test_pass_thru(): """ Test that simple_combine() will pass through non-pep508 data. @@ -180,14 +217,14 @@ def test_pass_thru(): } expected = [ - 'git+https://git.repo/some_pkg.git#egg=SomePackage # from collection a.b', - 'svn+svn://svn.repo/some_pkg/trunk/#egg=SomePackage # from collection a.b', - 'https://example.com/foo/foo-0.26.0-py2.py3-none-any.whl # from collection a.b', - 'http://my.package.repo/SomePackage-1.0.4.zip # from collection a.b', - '-i https://pypi.org/simple # from collection c.d', - '--extra-index-url http://my.package.repo/simple # from collection c.d', - '--no-clean # from collection c.d', - '-e svn+http://svn.example.com/svn/MyProject/trunk@2019#egg=MyProject # from collection c.d', + 'git+https://git.repo/some_pkg.git#egg=SomePackage', + 'svn+svn://svn.repo/some_pkg/trunk/#egg=SomePackage', + 'https://example.com/foo/foo-0.26.0-py2.py3-none-any.whl', + 'http://my.package.repo/SomePackage-1.0.4.zip', + '-i https://pypi.org/simple', + '--extra-index-url http://my.package.repo/simple', + '--no-clean', + '-e svn+http://svn.example.com/svn/MyProject/trunk@2019#egg=MyProject', ] assert simple_combine(reqs) == expected @@ -195,28 +232,28 @@ def test_pass_thru(): def test_excluded_requirements(): reqs = { - 'a.b': [ - 'req1', - 'req2==0.1.0', - 'req4 ; python_version<=3.9', - 'git+https://git.repo/some_pkg.git#egg=SomePackage', + "a.b": [ + "req1", + "req2==0.1.0", + "req4 ; python_version<='3.9'", + "git+https://git.repo/some_pkg.git#egg=SomePackage", ], - 'c.d': [ - 'req1<=2.0.0', - 'req3', + "c.d": [ + "req1<=2.0.0", + "req3", ] } excluded = [ - 'req1', - 'req4', - 'git', # This currently breaks this test since it matches the git+https url after regex parsing + "req1", + "req4", + "git", ] expected = [ - 'req2==0.1.0 # from collection a.b', - 'git+https://git.repo/some_pkg.git#egg=SomePackage # from collection a.b', - 'req3 # from collection c.d', + "req2==0.1.0 # from collection a.b", + "git+https://git.repo/some_pkg.git#egg=SomePackage", + "req3 # from collection c.d", ] assert simple_combine(reqs, excluded) == expected From 5814a17b45448b8a0f34a33c53aec7d45d5b71d6 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Thu, 11 Apr 2024 11:57:47 -0400 Subject: [PATCH 16/28] More work around system req exclusions --- docs/definition.rst | 5 ++ .../_target_scripts/introspect.py | 90 +++++++++---------- test/unit/test_introspect.py | 43 ++++++++- 3 files changed, 89 insertions(+), 49 deletions(-) diff --git a/docs/definition.rst b/docs/definition.rst index 9a36382c..59389e7c 100644 --- a/docs/definition.rst +++ b/docs/definition.rst @@ -257,6 +257,11 @@ The following keys are valid for this section: exclusions should be a list of package names appearing under the ``python`` key name. System dependency exclusions should be a list of system package names appearing under the ``system`` key name. + The exclusion string should be the simple name of the requirement you want excluded. For example, + if you need to exclude the system requirement that appears as ``foo [!platform:gentoo]`` within + an included collection, then your exclusion string should be ``foo``. To exclude the Python + requirement ``bar == 1.0.0``, your exclusion string would be ``bar``. + Example: .. code:: yaml diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index 5bfd4fce..e8c59b6c 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import argparse import logging import os @@ -93,16 +95,20 @@ def process(data_dir=base_collections_path, Build a dictionary of Python and system requirements from any collections installed in data_dir, and any user specified requirements. + Excluded requirements, if any, will be inserted into the return dict. + Example return dict: { 'python': { 'collection.a': ['abc', 'def'], 'collection.b': ['ghi'], 'user': ['jkl'], + 'exclude: ['abc'], }, 'system': { 'collection.a': ['ZYX'], 'user': ['WVU'], + 'exclude': ['ZYX'], }, } """ @@ -228,14 +234,6 @@ def get_dependency(self, entry): return req_file -def is_pep508_compliant(req: str): - try: - Requirement(req) - except InvalidRequirement: - return False - return True - - def strip_comments(reqs: dict[str, list]) -> dict[str, list]: """ Filter any comments out of the Python collection requirements input. @@ -254,65 +252,67 @@ def strip_comments(reqs: dict[str, list]) -> dict[str, list]: return result -def simple_combine(reqs, exclude=None, name_only=False, test_pep508=True): +def simple_combine(reqs: dict[str, list], exclude: list[str] | None = None, is_python: bool = True) -> list[str]: """ Given a dictionary of Python requirement lines keyed off collections, - return a list a cleaned up (no source comments) requirements annotated - with comments indicating the sources based off the collection keys. + return a consolidated list of cleaned up (no source comments) requirements + annotated with comments indicating the sources based off the collection keys. - Currently, non-pep508 compliant entries are passed through. + Currently, non-pep508 compliant Python entries are passed through. We also no + longer attempt to normalize names (replace '_' with '-', etc), other than + lowercasing it for exclusion matching, since we no longer are attempting + to combine similar entries. - :param dict reqs: A dict of Python requirements, keyed by collection name. - :param exclude: - :param bool name_only: If true, requirements will be only the simple name (no versions or annotation). - :param bool test_pep508: If true, test each line for PEP508 compliance. + :param dict reqs: A dict of either Python or system requirements, keyed by collection name. + :param list exclude: A list of requirements to be excluded from the output. + :param bool is_python: This should be set to True for Python requirements, as each + will be tested for PEP508 compliance. This should be set to False for system requirements. :return: A list of (possibly) annotated requirements. """ if exclude is None: exclude = [] + else: + exclude = [r.lower() for r in exclude] - consolidated = [] - fancy_lines = [] + consolidated: list[str] = [] + fancy_lines: list[str] = [] uncommented_reqs = strip_comments(reqs) for collection, lines in uncommented_reqs.items(): for line in lines: - if test_pep508 and not is_pep508_compliant(line): - logger.warning( - "Passing through non-PEP508 compliant line '%s' from collection '%s'", - line, collection - ) - fancy_lines.append(line) # We intentionally won't annotate these lines (multi-line?) - continue - # Did not match expected name format, just pass thru - if not (name_match := REQ_NAME_RE.match(line)): - logger.warning( - "Passing through unparsable requirement name '%s' from collection '%s'", - line, collection - ) - fancy_lines.append(line) - continue + # Determine the simple name based on type of requirement + if is_python: + try: + parsed_req = Requirement(line) + name = parsed_req.name + except InvalidRequirement: + logger.warning( + "Passing through non-PEP508 compliant line '%s' from collection '%s'", + line, collection + ) + fancy_lines.append(line) # We intentionally won't annotate these lines (multi-line?) + continue + else: + # bindep system requirements have the package name as the first "word" on the line + name = line.split(maxsplit=1)[0] + + lower_name = name.lower() - name = REQ_NORM_RE.sub('-', name_match.group(1)) - if name in exclude and collection not in {'user', 'exclude'}: + if lower_name in exclude and collection not in {'user', 'exclude'}: logger.debug("# Explicitly excluding requirement '%s' from '%s'", name, collection) continue - if name in EXCLUDE_REQUIREMENTS and collection not in {'user', 'exclude'}: + if lower_name in EXCLUDE_REQUIREMENTS and collection not in {'user', 'exclude'}: logger.debug("# Excluding requirement '%s' from '%s'", name, collection) continue if line in consolidated: i = consolidated.index(line) - if not name_only: - fancy_lines[i] += f', {collection}' + fancy_lines[i] += f', {collection}' else: - if name_only: - fancy_line = name - else: - fancy_line = f'{line} # from collection {collection}' + fancy_line = f'{line} # from collection {collection}' consolidated.append(line) fancy_lines.append(fancy_line) @@ -348,12 +348,12 @@ def run_introspect(args, log): log.info('# Dependency data for %s', args.folder) data['python'] = simple_combine( data['python'], - exclude=simple_combine({'exclude': data['python'].pop('exclude', {})}, name_only=True) + exclude=data['python'].pop('exclude', []), ) data['system'] = simple_combine( data['system'], - exclude=simple_combine({'exclude': data['system'].pop('exclude', {})}, name_only=True), - test_pep508=False + exclude=data['system'].pop('exclude', []), + is_python=False ) print('---') diff --git a/test/unit/test_introspect.py b/test/unit/test_introspect.py index 052a0d03..99d26ed5 100644 --- a/test/unit/test_introspect.py +++ b/test/unit/test_introspect.py @@ -12,7 +12,7 @@ def test_multiple_collection_metadata(data_dir): files = process(data_dir) files['python'] = simple_combine(files['python']) - files['system'] = simple_combine(files['system'], test_pep508=False) + files['system'] = simple_combine(files['system'], is_python=False) assert files == {'python': [ 'pyvcloud>=14 # from collection test.metadata', @@ -117,7 +117,7 @@ def test_sanitize_pep508(): expected = [ 'foo[ext1,ext3] == 1 # from collection a.b', 'bar; python_version < "2.7" # from collection a.b', - 'A', + 'A # from collection a.b', 'name # from collection a.b', 'FOO >= 1 # from collection c.d', 'bar; python_version < "3.6" # from collection c.d', @@ -194,7 +194,7 @@ def test_strip_comments(): assert strip_comments(reqs) == expected -def test_pass_thru(): +def test_python_pass_thru(): """ Test that simple_combine() will pass through non-pep508 data. """ @@ -230,7 +230,38 @@ def test_pass_thru(): assert simple_combine(reqs) == expected -def test_excluded_requirements(): +def test_excluded_system_requirements(): + reqs = { + 'a.b': [ + 'libxml2-dev [platform:dpkg]', + 'dev-libs/libxml2', + 'python3-lxml [(platform:redhat platform:base-py3)]', + 'foo [platform:bar]', + ], + 'c.d': [ + '# python is in EXCLUDED_REQUIREMENTS', + 'python [platform:brew] ==3.7.3', + 'libxml2-dev [platform:dpkg]', + 'python3-all-dev [platform:dpkg !platform:ubuntu-precise]', + ], + 'user': [ + 'foo', # should never exclude from user reqs + ] + } + + excluded = ['python3-lxml', 'foo'] + + expected = [ + 'libxml2-dev [platform:dpkg] # from collection a.b, c.d', + 'dev-libs/libxml2 # from collection a.b', + 'python3-all-dev [platform:dpkg !platform:ubuntu-precise] # from collection c.d', + 'foo # from collection user', + ] + + assert simple_combine(reqs, exclude=excluded, is_python=False) == expected + + +def test_excluded_python_requirements(): reqs = { "a.b": [ "req1", @@ -241,6 +272,9 @@ def test_excluded_requirements(): "c.d": [ "req1<=2.0.0", "req3", + ], + "user": [ + "req1" # should never exclude from user reqs ] } @@ -254,6 +288,7 @@ def test_excluded_requirements(): "req2==0.1.0 # from collection a.b", "git+https://git.repo/some_pkg.git#egg=SomePackage", "req3 # from collection c.d", + "req1 # from collection user", ] assert simple_combine(reqs, excluded) == expected From ef73758da9dd18dc89b1952d131daf5502d17b88 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Thu, 11 Apr 2024 13:09:13 -0400 Subject: [PATCH 17/28] No longer consolidate duplicate requirement entries --- .../_target_scripts/introspect.py | 34 ++++++------------- test/unit/test_introspect.py | 3 +- 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index e8c59b6c..e441a063 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -16,12 +16,6 @@ # regex for a comment at the start of a line, or embedded with leading space(s) COMMENT_RE = re.compile(r'(?:^|\s+)#.*$') -# https://peps.python.org/pep-0503/#normalized-names -REQ_NORM_RE = re.compile(r'[-_.]+') - -# match anything that doesn't start with a '-' -REQ_NAME_RE = re.compile(r'^([^-][-\w.]+)') - def line_is_empty(line): return bool((not line.strip()) or line.startswith('#')) @@ -255,7 +249,7 @@ def strip_comments(reqs: dict[str, list]) -> dict[str, list]: def simple_combine(reqs: dict[str, list], exclude: list[str] | None = None, is_python: bool = True) -> list[str]: """ Given a dictionary of Python requirement lines keyed off collections, - return a consolidated list of cleaned up (no source comments) requirements + return a list of cleaned up (no source comments) requirements annotated with comments indicating the sources based off the collection keys. Currently, non-pep508 compliant Python entries are passed through. We also no @@ -270,14 +264,11 @@ def simple_combine(reqs: dict[str, list], exclude: list[str] | None = None, is_p :return: A list of (possibly) annotated requirements. """ - if exclude is None: - exclude = [] - else: - exclude = [r.lower() for r in exclude] - - consolidated: list[str] = [] - fancy_lines: list[str] = [] + exclusions: list[str] = [] + if exclude: + exclusions = [r.lower() for r in exclude] + annotated_lines: list[str] = [] uncommented_reqs = strip_comments(reqs) for collection, lines in uncommented_reqs.items(): @@ -293,7 +284,7 @@ def simple_combine(reqs: dict[str, list], exclude: list[str] | None = None, is_p "Passing through non-PEP508 compliant line '%s' from collection '%s'", line, collection ) - fancy_lines.append(line) # We intentionally won't annotate these lines (multi-line?) + annotated_lines.append(line) # We intentionally won't annotate these lines (multi-line?) continue else: # bindep system requirements have the package name as the first "word" on the line @@ -301,22 +292,17 @@ def simple_combine(reqs: dict[str, list], exclude: list[str] | None = None, is_p lower_name = name.lower() - if lower_name in exclude and collection not in {'user', 'exclude'}: + if lower_name in exclusions and collection not in {'user', 'exclude'}: logger.debug("# Explicitly excluding requirement '%s' from '%s'", name, collection) continue if lower_name in EXCLUDE_REQUIREMENTS and collection not in {'user', 'exclude'}: logger.debug("# Excluding requirement '%s' from '%s'", name, collection) continue - if line in consolidated: - i = consolidated.index(line) - fancy_lines[i] += f', {collection}' - else: - fancy_line = f'{line} # from collection {collection}' - consolidated.append(line) - fancy_lines.append(fancy_line) + annotated_line = f'{line} # from collection {collection}' + annotated_lines.append(annotated_line) - return fancy_lines + return annotated_lines def parse_args(args=None): diff --git a/test/unit/test_introspect.py b/test/unit/test_introspect.py index 99d26ed5..12a7f1b4 100644 --- a/test/unit/test_introspect.py +++ b/test/unit/test_introspect.py @@ -252,8 +252,9 @@ def test_excluded_system_requirements(): excluded = ['python3-lxml', 'foo'] expected = [ - 'libxml2-dev [platform:dpkg] # from collection a.b, c.d', + 'libxml2-dev [platform:dpkg] # from collection a.b', 'dev-libs/libxml2 # from collection a.b', + 'libxml2-dev [platform:dpkg] # from collection c.d', 'python3-all-dev [platform:dpkg !platform:ubuntu-precise] # from collection c.d', 'foo # from collection user', ] From 1afc7972397aeab08f114feda8c2e2712ab33d6d Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Thu, 11 Apr 2024 13:30:03 -0400 Subject: [PATCH 18/28] Minor code reorg for introspect.py --- .../_target_scripts/introspect.py | 169 +++++++++--------- 1 file changed, 88 insertions(+), 81 deletions(-) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index e441a063..74db13f8 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -10,13 +10,85 @@ from packaging.requirements import InvalidRequirement, Requirement -base_collections_path = '/usr/share/ansible/collections' -logger = logging.getLogger(__name__) +BASE_COLLECTIONS_PATH = '/usr/share/ansible/collections' + # regex for a comment at the start of a line, or embedded with leading space(s) COMMENT_RE = re.compile(r'(?:^|\s+)#.*$') +EXCLUDE_REQUIREMENTS = frozenset(( + # obviously already satisfied or unwanted + 'ansible', 'ansible-base', 'python', 'ansible-core', + # general python test requirements + 'tox', 'pycodestyle', 'yamllint', 'pylint', + 'flake8', 'pytest', 'pytest-xdist', 'coverage', 'mock', 'testinfra', + # test requirements highly specific to Ansible testing + 'ansible-lint', 'molecule', 'galaxy-importer', 'voluptuous', + # already present in image for py3 environments + 'yaml', 'pyyaml', 'json', +)) + + +logger = logging.getLogger(__name__) + + +class CollectionDefinition: + """ + This class represents the dependency metadata for a collection + should be replaced by logic to hit the Galaxy API if made available + """ + + def __init__(self, collection_path): + self.reference_path = collection_path + + # NOTE: Filenames should match constants.DEAFULT_EE_BASENAME and constants.YAML_FILENAME_EXTENSIONS. + meta_file_base = os.path.join(collection_path, 'meta', 'execution-environment') + ee_exists = False + for ext in ('yml', 'yaml'): + meta_file = f"{meta_file_base}.{ext}" + if os.path.exists(meta_file): + with open(meta_file, 'r') as f: + self.raw = yaml.safe_load(f) + ee_exists = True + break + + if not ee_exists: + self.raw = {'version': 1, 'dependencies': {}} + # Automatically infer requirements for collection + for entry, filename in [('python', 'requirements.txt'), ('system', 'bindep.txt')]: + candidate_file = os.path.join(collection_path, filename) + if has_content(candidate_file): + self.raw['dependencies'][entry] = filename + + def target_dir(self): + namespace, name = self.namespace_name() + return os.path.join( + BASE_COLLECTIONS_PATH, 'ansible_collections', + namespace, name + ) + + def namespace_name(self): + "Returns 2-tuple of namespace and name" + path_parts = [p for p in self.reference_path.split(os.path.sep) if p] + return tuple(path_parts[-2:]) + + def get_dependency(self, entry): + """A collection is only allowed to reference a file by a relative path + which is relative to the collection root + """ + req_file = self.raw.get('dependencies', {}).get(entry) + if req_file is None: + return None + if os.path.isabs(req_file): + raise RuntimeError( + 'Collections must specify relative paths for requirements files. ' + f'The file {req_file} specified by {self.reference_path} violates this.' + ) + + return req_file + + def line_is_empty(line): return bool((not line.strip()) or line.startswith('#')) @@ -65,14 +137,14 @@ def process_collection(path): :param str path: root directory of collection (this would contain galaxy.yml file) """ - CD = CollectionDefinition(path) + col_def = CollectionDefinition(path) - py_file = CD.get_dependency('python') + py_file = col_def.get_dependency('python') pip_lines = [] if py_file: pip_lines = pip_file_data(os.path.join(path, py_file)) - sys_file = CD.get_dependency('system') + sys_file = col_def.get_dependency('system') bindep_lines = [] if sys_file: bindep_lines = bindep_file_data(os.path.join(path, sys_file)) @@ -80,7 +152,7 @@ def process_collection(path): return (pip_lines, bindep_lines) -def process(data_dir=base_collections_path, +def process(data_dir=BASE_COLLECTIONS_PATH, user_pip=None, user_bindep=None, user_pip_exclude=None, @@ -127,8 +199,8 @@ def process(data_dir=base_collections_path, sys_req = {} for path in paths: col_pip_lines, col_sys_lines = process_collection(path) - CD = CollectionDefinition(path) - namespace, name = CD.namespace_name() + col_def = CollectionDefinition(path) + namespace, name = col_def.namespace_name() key = f'{namespace}.{name}' if col_pip_lines: @@ -173,61 +245,6 @@ def has_content(candidate_file): return bool(content.strip().strip('\n')) -class CollectionDefinition: - """This class represents the dependency metadata for a collection - should be replaced by logic to hit the Galaxy API if made available - """ - - def __init__(self, collection_path): - self.reference_path = collection_path - - # NOTE: Filenames should match constants.DEAFULT_EE_BASENAME and constants.YAML_FILENAME_EXTENSIONS. - meta_file_base = os.path.join(collection_path, 'meta', 'execution-environment') - ee_exists = False - for ext in ('yml', 'yaml'): - meta_file = f"{meta_file_base}.{ext}" - if os.path.exists(meta_file): - with open(meta_file, 'r') as f: - self.raw = yaml.safe_load(f) - ee_exists = True - break - - if not ee_exists: - self.raw = {'version': 1, 'dependencies': {}} - # Automatically infer requirements for collection - for entry, filename in [('python', 'requirements.txt'), ('system', 'bindep.txt')]: - candidate_file = os.path.join(collection_path, filename) - if has_content(candidate_file): - self.raw['dependencies'][entry] = filename - - def target_dir(self): - namespace, name = self.namespace_name() - return os.path.join( - base_collections_path, 'ansible_collections', - namespace, name - ) - - def namespace_name(self): - "Returns 2-tuple of namespace and name" - path_parts = [p for p in self.reference_path.split(os.path.sep) if p] - return tuple(path_parts[-2:]) - - def get_dependency(self, entry): - """A collection is only allowed to reference a file by a relative path - which is relative to the collection root - """ - req_file = self.raw.get('dependencies', {}).get(entry) - if req_file is None: - return None - if os.path.isabs(req_file): - raise RuntimeError( - 'Collections must specify relative paths for requirements files. ' - f'The file {req_file} specified by {self.reference_path} violates this.' - ) - - return req_file - - def strip_comments(reqs: dict[str, list]) -> dict[str, list]: """ Filter any comments out of the Python collection requirements input. @@ -246,7 +263,9 @@ def strip_comments(reqs: dict[str, list]) -> dict[str, list]: return result -def simple_combine(reqs: dict[str, list], exclude: list[str] | None = None, is_python: bool = True) -> list[str]: +def simple_combine(reqs: dict[str, list], + exclude: list[str] | None = None, + is_python: bool = True) -> list[str]: """ Given a dictionary of Python requirement lines keyed off collections, return a list of cleaned up (no source comments) requirements @@ -262,7 +281,7 @@ def simple_combine(reqs: dict[str, list], exclude: list[str] | None = None, is_p :param bool is_python: This should be set to True for Python requirements, as each will be tested for PEP508 compliance. This should be set to False for system requirements. - :return: A list of (possibly) annotated requirements. + :return: A list of annotated requirements. """ exclusions: list[str] = [] if exclude: @@ -299,8 +318,7 @@ def simple_combine(reqs: dict[str, list], exclude: list[str] | None = None, is_p logger.debug("# Excluding requirement '%s' from '%s'", name, collection) continue - annotated_line = f'{line} # from collection {collection}' - annotated_lines.append(annotated_line) + annotated_lines.append(f'{line} # from collection {collection}') return annotated_lines @@ -332,10 +350,12 @@ def run_introspect(args, log): user_pip_exclude=args.user_pip_exclude, user_bindep_exclude=args.user_bindep_exclude) log.info('# Dependency data for %s', args.folder) + data['python'] = simple_combine( data['python'], exclude=data['python'].pop('exclude', []), ) + data['system'] = simple_combine( data['system'], exclude=data['system'].pop('exclude', []), @@ -367,7 +387,7 @@ def create_introspect_parser(parser): help=argparse.SUPPRESS) introspect_parser.add_argument( - 'folder', default=base_collections_path, nargs='?', + 'folder', default=BASE_COLLECTIONS_PATH, nargs='?', help=( 'Ansible collections path(s) to introspect. ' 'This should have a folder named ansible_collections inside of it.' @@ -404,19 +424,6 @@ def create_introspect_parser(parser): return introspect_parser -EXCLUDE_REQUIREMENTS = frozenset(( - # obviously already satisfied or unwanted - 'ansible', 'ansible-base', 'python', 'ansible-core', - # general python test requirements - 'tox', 'pycodestyle', 'yamllint', 'pylint', - 'flake8', 'pytest', 'pytest-xdist', 'coverage', 'mock', 'testinfra', - # test requirements highly specific to Ansible testing - 'ansible-lint', 'molecule', 'galaxy-importer', 'voluptuous', - # already present in image for py3 environments - 'yaml', 'pyyaml', 'json', -)) - - def write_file(filename: str, lines: list) -> bool: parent_dir = os.path.dirname(filename) if parent_dir and not os.path.exists(parent_dir): From 392dfb5fe024e780b229b6121400bd15e08d1c42 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Mon, 15 Apr 2024 15:50:07 -0400 Subject: [PATCH 19/28] rename options, change schema data types, eliminate extraneous CONTEXT_FILES keys --- .../_target_scripts/introspect.py | 32 +++++++++---------- src/ansible_builder/constants.py | 5 --- src/ansible_builder/containerfile.py | 10 +++--- src/ansible_builder/ee_schema.py | 23 +++++++++++-- src/ansible_builder/user_definition.py | 5 +-- 5 files changed, 41 insertions(+), 34 deletions(-) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index 74db13f8..41a5aad1 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -155,8 +155,8 @@ def process_collection(path): def process(data_dir=BASE_COLLECTIONS_PATH, user_pip=None, user_bindep=None, - user_pip_exclude=None, - user_bindep_exclude=None): + exclude_pip=None, + exclude_bindep=None): """ Build a dictionary of Python and system requirements from any collections installed in data_dir, and any user specified requirements. @@ -214,16 +214,16 @@ def process(data_dir=BASE_COLLECTIONS_PATH, col_pip_lines = pip_file_data(user_pip) if col_pip_lines: py_req['user'] = col_pip_lines - if user_pip_exclude: - col_pip_exclude_lines = pip_file_data(user_pip_exclude) + if exclude_pip: + col_pip_exclude_lines = pip_file_data(exclude_pip) if col_pip_exclude_lines: py_req['exclude'] = col_pip_exclude_lines if user_bindep: col_sys_lines = bindep_file_data(user_bindep) if col_sys_lines: sys_req['user'] = col_sys_lines - if user_bindep_exclude: - col_sys_exclude_lines = bindep_file_data(user_bindep_exclude) + if exclude_bindep: + col_sys_exclude_lines = bindep_file_data(exclude_bindep) if col_sys_exclude_lines: sys_req['exclude'] = col_sys_exclude_lines @@ -347,8 +347,8 @@ def run_introspect(args, log): data = process(args.folder, user_pip=args.user_pip, user_bindep=args.user_bindep, - user_pip_exclude=args.user_pip_exclude, - user_bindep_exclude=args.user_bindep_exclude) + exclude_pip=args.exclude_pip, + exclude_bindep=args.exclude_bindep) log.info('# Dependency data for %s', args.folder) data['python'] = simple_combine( @@ -393,24 +393,22 @@ def create_introspect_parser(parser): 'This should have a folder named ansible_collections inside of it.' ) ) - # Combine user requirements and collection requirements into single file - # in the future, could look into passing multilple files to - # python-builder scripts to be fed multiple files as opposed to this + introspect_parser.add_argument( '--user-pip', dest='user_pip', help='An additional file to combine with collection pip requirements.' ) - introspect_parser.add_argument( - '--user-pip-exclude', dest='user_pip_exclude', - help='An additional file to exclude specific pip requirements.' - ) introspect_parser.add_argument( '--user-bindep', dest='user_bindep', help='An additional file to combine with collection bindep requirements.' ) introspect_parser.add_argument( - '--user-bindep-exclude', dest='user_bindep_exclude', - help='An additional file to exclude specific bindep requirements.' + '--exclude-bindep-reqs', dest='exclude_bindep', + help='An additional file to exclude specific bindep requirements from collections.' + ) + introspect_parser.add_argument( + '--exclude-pip-reqs', dest='exclude_pip', + help='An additional file to exclude specific pip requirements from collections.' ) introspect_parser.add_argument( '--write-pip', dest='write_pip', diff --git a/src/ansible_builder/constants.py b/src/ansible_builder/constants.py index da37ab3c..92309324 100644 --- a/src/ansible_builder/constants.py +++ b/src/ansible_builder/constants.py @@ -34,11 +34,6 @@ # Files that need to be moved into the build context, and their naming inside the context CONTEXT_FILES = { - # HACK: hacking in prototype other kinds of deps for dynamic builder - 'python_interpreter': '', - 'ansible_core': '', - 'ansible_runner': '', - 'galaxy': 'requirements.yml', 'python': 'requirements.txt', 'system': 'bindep.txt', diff --git a/src/ansible_builder/containerfile.py b/src/ansible_builder/containerfile.py index 14462278..9535d0c3 100644 --- a/src/ansible_builder/containerfile.py +++ b/src/ansible_builder/containerfile.py @@ -254,11 +254,9 @@ def _create_folder_copy_files(self) -> None: scripts_dir = str(Path(self.build_outputs_dir) / 'scripts') os.makedirs(scripts_dir, exist_ok=True) + # For the python, system, and galaxy requirements, get a file path to the contents and copy + # it into the context directory with an expected name to later be used during the container builds. for item, new_name in constants.CONTEXT_FILES.items(): - # HACK: new dynamic base/builder - if not new_name: - continue - for exclude in (False, True): if exclude is True: new_name = f'exclude-{new_name}' @@ -461,7 +459,7 @@ def _prepare_introspect_assemble_steps(self) -> None: f"exclude-{constants.CONTEXT_FILES['python']}" ) self.steps.append(f"COPY {relative_pip_exclude_path} exclude-{constants.CONTEXT_FILES['python']}") - introspect_cmd += f" --user-pip-exclude=exclude-{constants.CONTEXT_FILES['python']}" + introspect_cmd += f" --exclude-pip=exclude-{constants.CONTEXT_FILES['python']}" bindep_exists = os.path.exists(os.path.join(self.build_outputs_dir, constants.CONTEXT_FILES['system'])) if bindep_exists: @@ -478,7 +476,7 @@ def _prepare_introspect_assemble_steps(self) -> None: f"exclude-{constants.CONTEXT_FILES['system']}" ) self.steps.append(f"COPY {relative_exclude_bindep_path} exclude-{constants.CONTEXT_FILES['system']}") - introspect_cmd += f" --user-bindep-exclude=exclude-{constants.CONTEXT_FILES['system']}" + introspect_cmd += f" --exclude-bindep=exclude-{constants.CONTEXT_FILES['system']}" introspect_cmd += " --write-bindep=/tmp/src/bindep.txt --write-pip=/tmp/src/requirements.txt" diff --git a/src/ansible_builder/ee_schema.py b/src/ansible_builder/ee_schema.py index 08c88579..b1239c1a 100644 --- a/src/ansible_builder/ee_schema.py +++ b/src/ansible_builder/ee_schema.py @@ -273,8 +273,27 @@ ], }, "exclude": { - "python": TYPE_StringOrListOfStrings, - "system": TYPE_StringOrListOfStrings, + "python": { + "description": "A list of python package names", + "type": "array", + "items": { + "type: string" + } + }, + "system": { + "description": "A list of system package names", + "type": "array", + "items": { + "type: string" + } + }, + "all_from_collections": { + "description": "A list of collection names", + "type": "array", + "items": { + "type": "string" + } + }, }, }, }, diff --git a/src/ansible_builder/user_definition.py b/src/ansible_builder/user_definition.py index ba04eb47..9452f60f 100644 --- a/src/ansible_builder/user_definition.py +++ b/src/ansible_builder/user_definition.py @@ -243,10 +243,7 @@ def validate(self): """ validate_schema(self.raw) - for item, value in constants.CONTEXT_FILES.items(): - # HACK: non-file deps for dynamic base/builder - if not value: - continue + for item in constants.CONTEXT_FILES: for exclude in (False, True): requirement_path = self.get_dep_abs_path(item, exclude=exclude) if requirement_path: From 2ebb72c9e0417f528168cd89341d5994932ef98b Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Tue, 16 Apr 2024 15:22:21 -0400 Subject: [PATCH 20/28] Add support to exclude collection requirements --- .../_target_scripts/introspect.py | 42 ++++++++-- src/ansible_builder/containerfile.py | 26 +++++- src/ansible_builder/user_definition.py | 6 +- test/integration/test_introspect_cli.py | 38 +++++++++ test/unit/test_introspect.py | 79 ++++++++++++++++++- test/unit/test_user_definition.py | 21 +++++ 6 files changed, 203 insertions(+), 9 deletions(-) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index 41a5aad1..da94e817 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -156,7 +156,8 @@ def process(data_dir=BASE_COLLECTIONS_PATH, user_pip=None, user_bindep=None, exclude_pip=None, - exclude_bindep=None): + exclude_bindep=None, + exclude_collections=None): """ Build a dictionary of Python and system requirements from any collections installed in data_dir, and any user specified requirements. @@ -176,6 +177,9 @@ def process(data_dir=BASE_COLLECTIONS_PATH, 'user': ['WVU'], 'exclude': ['ZYX'], }, + 'excluded_collections': [ + 'a.b', + ] } """ paths = [] @@ -227,11 +231,20 @@ def process(data_dir=BASE_COLLECTIONS_PATH, if col_sys_exclude_lines: sys_req['exclude'] = col_sys_exclude_lines - return { + retval = { 'python': py_req, - 'system': sys_req + 'system': sys_req, } + if exclude_collections: + # This file should just be a newline separated list of collection names, + # so reusing bindep_file_data() to read it should work fine. + excluded_collection_list = bindep_file_data(exclude_collections) + if excluded_collection_list: + retval['excluded_collections'] = excluded_collection_list + + return retval + def has_content(candidate_file): """Beyond checking that the candidate exists, this also assures @@ -265,6 +278,7 @@ def strip_comments(reqs: dict[str, list]) -> dict[str, list]: def simple_combine(reqs: dict[str, list], exclude: list[str] | None = None, + exclude_collections: list[str] | None = None, is_python: bool = True) -> list[str]: """ Given a dictionary of Python requirement lines keyed off collections, @@ -278,21 +292,30 @@ def simple_combine(reqs: dict[str, list], :param dict reqs: A dict of either Python or system requirements, keyed by collection name. :param list exclude: A list of requirements to be excluded from the output. + :param list exclude_collections: A list of collection names from which to exclude all requirements. :param bool is_python: This should be set to True for Python requirements, as each will be tested for PEP508 compliance. This should be set to False for system requirements. :return: A list of annotated requirements. """ exclusions: list[str] = [] + collection_ignore_list: list[str] = [] + if exclude: exclusions = [r.lower() for r in exclude] + if exclude_collections: + collection_ignore_list = [c.lower() for c in exclude_collections] annotated_lines: list[str] = [] uncommented_reqs = strip_comments(reqs) for collection, lines in uncommented_reqs.items(): - for line in lines: + # Bypass this collection if we've been told to ignore all requirements from it. + if collection.lower() in collection_ignore_list: + logger.debug("# Excluding all requirements from collection '%s'", collection) + continue + for line in lines: # Determine the simple name based on type of requirement if is_python: try: @@ -348,17 +371,22 @@ def run_introspect(args, log): user_pip=args.user_pip, user_bindep=args.user_bindep, exclude_pip=args.exclude_pip, - exclude_bindep=args.exclude_bindep) + exclude_bindep=args.exclude_bindep, + exclude_collections=args.exclude_collections) log.info('# Dependency data for %s', args.folder) + excluded_collections = data.pop('excluded_collections', None) + data['python'] = simple_combine( data['python'], exclude=data['python'].pop('exclude', []), + exclude_collections=excluded_collections, ) data['system'] = simple_combine( data['system'], exclude=data['system'].pop('exclude', []), + exclude_collections=excluded_collections, is_python=False ) @@ -410,6 +438,10 @@ def create_introspect_parser(parser): '--exclude-pip-reqs', dest='exclude_pip', help='An additional file to exclude specific pip requirements from collections.' ) + introspect_parser.add_argument( + '--exclude-collection-reqs', dest='exclude_collections', + help='An additional file to exclude all requirements from the listed collections.' + ) introspect_parser.add_argument( '--write-pip', dest='write_pip', help='Write the combined pip requirements file to this location.' diff --git a/src/ansible_builder/containerfile.py b/src/ansible_builder/containerfile.py index 9535d0c3..1a1ad490 100644 --- a/src/ansible_builder/containerfile.py +++ b/src/ansible_builder/containerfile.py @@ -3,6 +3,7 @@ import importlib.resources import logging import os +import tempfile from pathlib import Path @@ -256,6 +257,7 @@ def _create_folder_copy_files(self) -> None: # For the python, system, and galaxy requirements, get a file path to the contents and copy # it into the context directory with an expected name to later be used during the container builds. + # The get_dep_abs_path() handles parsing the various requirements and any exclusions, if any. for item, new_name in constants.CONTEXT_FILES.items(): for exclude in (False, True): if exclude is True: @@ -271,6 +273,17 @@ def _create_folder_copy_files(self) -> None: # about the contents anyway. copy_file(requirement_path, dest, ignore_mtime=True) + # We need to handle dependencies.exclude.all_from_collections independently since + # it doesn't follow the same model as the other dependency requirements. + exclude_deps = self.definition.dependencies.get('exclude') + if exclude_deps and 'all_from_collections' in exclude_deps: + collection_ignore_list = exclude_deps['all_from_collections'] + dest = os.path.join(self.build_context, constants.user_content_subfolder, "exclude-collections.txt") + with tempfile.NamedTemporaryFile('w') as fp: + fp.write('\n'.join(collection_ignore_list)) + fp.flush() + copy_file(fp.name, dest, ignore_mtime=True) + if self.original_galaxy_keyring: copy_file( self.original_galaxy_keyring, @@ -459,7 +472,7 @@ def _prepare_introspect_assemble_steps(self) -> None: f"exclude-{constants.CONTEXT_FILES['python']}" ) self.steps.append(f"COPY {relative_pip_exclude_path} exclude-{constants.CONTEXT_FILES['python']}") - introspect_cmd += f" --exclude-pip=exclude-{constants.CONTEXT_FILES['python']}" + introspect_cmd += f" --exclude-pip-reqs=exclude-{constants.CONTEXT_FILES['python']}" bindep_exists = os.path.exists(os.path.join(self.build_outputs_dir, constants.CONTEXT_FILES['system'])) if bindep_exists: @@ -476,7 +489,16 @@ def _prepare_introspect_assemble_steps(self) -> None: f"exclude-{constants.CONTEXT_FILES['system']}" ) self.steps.append(f"COPY {relative_exclude_bindep_path} exclude-{constants.CONTEXT_FILES['system']}") - introspect_cmd += f" --exclude-bindep=exclude-{constants.CONTEXT_FILES['system']}" + introspect_cmd += f" --exclude-bindep-reqs=exclude-{constants.CONTEXT_FILES['system']}" + + exclude_collections_exists = os.path.exists(os.path.join( + self.build_outputs_dir, "exclude-collections.txt" + )) + if exclude_collections_exists: + relative_exclude_collections_path = os.path.join( + constants.user_content_subfolder, "exclude-collections.txt") + self.steps.append(f"COPY {relative_exclude_collections_path} exclude-collections.txt") + introspect_cmd += " --exclude-collection-reqs=exclude-collections.txt" introspect_cmd += " --write-bindep=/tmp/src/bindep.txt --write-pip=/tmp/src/requirements.txt" diff --git a/src/ansible_builder/user_definition.py b/src/ansible_builder/user_definition.py index 9452f60f..eb78b521 100644 --- a/src/ansible_builder/user_definition.py +++ b/src/ansible_builder/user_definition.py @@ -180,6 +180,10 @@ def container_init(self): def options(self): return self.raw.get('options', {}) + @property + def dependencies(self): + return self.raw.get('dependencies', {}) + # This is the size of galaxy, python, system * exclude=True/False @functools.lru_cache(maxsize=6) def get_dep_abs_path(self, entry, exclude=False): @@ -187,7 +191,7 @@ def get_dep_abs_path(self, entry, exclude=False): an absolute path or a path relative to the EE definition folder This method will return the absolute path. """ - deps = self.raw.get('dependencies', {}) + deps = self.dependencies if exclude: deps = deps.get('exclude', {}) diff --git a/test/integration/test_introspect_cli.py b/test/integration/test_introspect_cli.py index 13919fa2..18716e89 100644 --- a/test/integration/test_introspect_cli.py +++ b/test/integration/test_introspect_cli.py @@ -48,3 +48,41 @@ def test_introspect_with_user_reqs(cli, data_dir, tmp_path): assert 'ansible # from collection user' in r.stdout # 'pytest' allowed in user requirements assert 'pytest # from collection user' in r.stdout + + +def test_introspect_exclude_python(cli, data_dir, tmp_path): + exclude_file = tmp_path / 'exclude.txt' + exclude_file.write_text("pytz\npython-dateutil\n") + + r = cli(f'ansible-builder introspect {data_dir} --exclude-pip-reqs={exclude_file}') + data = yaml.safe_load(r.stdout) + + assert 'python' in data + assert 'system' in data + assert 'pytz' not in r.stdout + assert 'python-dateutil' not in r.stdout + + +def test_introspect_exclude_system(cli, data_dir, tmp_path): + exclude_file = tmp_path / 'exclude.txt' + exclude_file.write_text("subversion\n") + + r = cli(f'ansible-builder introspect {data_dir} --exclude-bindep-reqs={exclude_file}') + data = yaml.safe_load(r.stdout) + + assert 'python' in data + assert 'system' in data + assert 'subversion' not in r.stdout + + +def test_introspect_exclude_collections(cli, data_dir, tmp_path): + exclude_file = tmp_path / 'exclude.txt' + exclude_file.write_text("test.reqfile\ntest.bindep\n") + + r = cli(f'ansible-builder introspect {data_dir} --exclude-collection-reqs={exclude_file}') + data = yaml.safe_load(r.stdout) + + assert 'python' in data + assert 'system' in data + assert 'from collection test.reqfile' not in r.stdout + assert 'from collection test.bindep' not in r.stdout diff --git a/test/unit/test_introspect.py b/test/unit/test_introspect.py index 12a7f1b4..e026e63a 100644 --- a/test/unit/test_introspect.py +++ b/test/unit/test_introspect.py @@ -9,7 +9,6 @@ def test_multiple_collection_metadata(data_dir): - files = process(data_dir) files['python'] = simple_combine(files['python']) files['system'] = simple_combine(files['system'], is_python=False) @@ -27,6 +26,47 @@ def test_multiple_collection_metadata(data_dir): ]} +def test_process_returns_excluded_python(data_dir, tmp_path): + """ + Test that process() return value is properly formatted for excluded Python reqs. + """ + pip_ignore_file = tmp_path / "exclude-requirements.txt" + pip_ignore_file.write_text("req1\nreq2") + + retval = process(data_dir, exclude_pip=str(pip_ignore_file)) + + assert 'python' in retval + assert 'exclude' in retval['python'] + assert retval['python']['exclude'] == ['req1', 'req2'] + + +def test_process_returns_excluded_system(data_dir, tmp_path): + """ + Test that process() return value is properly formatted for excluded system reqs. + """ + bindep_ignore_file = tmp_path / "exclude-bindep.txt" + bindep_ignore_file.write_text("req1\nreq2") + + retval = process(data_dir, exclude_bindep=str(bindep_ignore_file)) + + assert 'system' in retval + assert 'exclude' in retval['system'] + assert retval['system']['exclude'] == ['req1', 'req2'] + + +def test_process_returns_excluded_collections(data_dir, tmp_path): + """ + Test that process() return value is properly formatted for excluded collections. + """ + col_ignore_file = tmp_path / "ignored_collections" + col_ignore_file.write_text("a.b\nc.d") + + retval = process(data_dir, exclude_collections=str(col_ignore_file)) + + assert 'excluded_collections' in retval + assert retval['excluded_collections'] == ['a.b', 'c.d'] + + def test_single_collection_metadata(data_dir): col_path = os.path.join(data_dir, 'ansible_collections', 'test', 'metadata') @@ -293,3 +333,40 @@ def test_excluded_python_requirements(): ] assert simple_combine(reqs, excluded) == expected + + +def test_simple_combine_excludes_collections(): + """ + Test that excluding all requirements from a list of collections works in simple_combine(). + """ + reqs = { + "a.b": [ + "req1", + "req2==0.1.0", + "req4 ; python_version<='3.9'", + "git+https://git.repo/some_pkg.git#egg=SomePackage", + ], + "c.d": [ + "req1<=2.0.0", + "req3", + ], + "e.f": [ + "req5", + ], + "user": [ + "req1" # should never exclude from user reqs + ] + } + + excluded_collections = [ + 'a.b', + 'e.f', + ] + + expected = [ + "req1<=2.0.0 # from collection c.d", + "req3 # from collection c.d", + "req1 # from collection user", + ] + + assert simple_combine(reqs, exclude_collections=excluded_collections) == expected diff --git a/test/unit/test_user_definition.py b/test/unit/test_user_definition.py index db21b9bd..3928f862 100644 --- a/test/unit/test_user_definition.py +++ b/test/unit/test_user_definition.py @@ -267,6 +267,27 @@ def test_v3_set_tags(self, exec_env_definition_file): value = definition.raw.get('options', {}).get('tags') assert value == ['ee_test:latest'] + def test_v3_dependencies_property(self, exec_env_definition_file): + deps = { + 'python': ['req1', 'req2'], + 'system': ['sysreq1', 'sysreq2'], + 'exclude': { + 'all_from_collections': ['a.b', 'c.d'], + } + } + + ee_content = { + 'version': 3, + 'dependencies': deps, + } + + path = exec_env_definition_file(content=ee_content) + definition = UserDefinition(path) + definition.validate() + + value = definition.dependencies + assert value == deps + class TestImageDescription: From 4c12bdcd5f281746e9fee622b85736fadd866350 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Tue, 16 Apr 2024 16:53:29 -0400 Subject: [PATCH 21/28] Rename simple_combine to filter_requirements --- .../_target_scripts/introspect.py | 14 +++++----- test/unit/test_introspect.py | 26 +++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index da94e817..66a87617 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -276,10 +276,10 @@ def strip_comments(reqs: dict[str, list]) -> dict[str, list]: return result -def simple_combine(reqs: dict[str, list], - exclude: list[str] | None = None, - exclude_collections: list[str] | None = None, - is_python: bool = True) -> list[str]: +def filter_requirements(reqs: dict[str, list], + exclude: list[str] | None = None, + exclude_collections: list[str] | None = None, + is_python: bool = True) -> list[str]: """ Given a dictionary of Python requirement lines keyed off collections, return a list of cleaned up (no source comments) requirements @@ -296,7 +296,7 @@ def simple_combine(reqs: dict[str, list], :param bool is_python: This should be set to True for Python requirements, as each will be tested for PEP508 compliance. This should be set to False for system requirements. - :return: A list of annotated requirements. + :return: A list of filtered and annotated requirements. """ exclusions: list[str] = [] collection_ignore_list: list[str] = [] @@ -377,13 +377,13 @@ def run_introspect(args, log): excluded_collections = data.pop('excluded_collections', None) - data['python'] = simple_combine( + data['python'] = filter_requirements( data['python'], exclude=data['python'].pop('exclude', []), exclude_collections=excluded_collections, ) - data['system'] = simple_combine( + data['system'] = filter_requirements( data['system'], exclude=data['system'].pop('exclude', []), exclude_collections=excluded_collections, diff --git a/test/unit/test_introspect.py b/test/unit/test_introspect.py index e026e63a..74ab4b6b 100644 --- a/test/unit/test_introspect.py +++ b/test/unit/test_introspect.py @@ -4,14 +4,14 @@ from ansible_builder._target_scripts.introspect import (parse_args, process, process_collection, - simple_combine, + filter_requirements, strip_comments) def test_multiple_collection_metadata(data_dir): files = process(data_dir) - files['python'] = simple_combine(files['python']) - files['system'] = simple_combine(files['system'], is_python=False) + files['python'] = filter_requirements(files['python']) + files['system'] = filter_requirements(files['system'], is_python=False) assert files == {'python': [ 'pyvcloud>=14 # from collection test.metadata', @@ -170,12 +170,12 @@ def test_sanitize_pep508(): "name[quux, strange];python_version<'2.7' and platform_version=='2' # from collection m.n" ] - assert simple_combine(reqs) == expected + assert filter_requirements(reqs) == expected def test_comment_parsing(): """ - Test that simple_combine() does not remove embedded URL anchors due to comment parsing. + Test that filter_requirements() does not remove embedded URL anchors due to comment parsing. """ reqs = { 'a.b': [ @@ -196,7 +196,7 @@ def test_comment_parsing(): 'git+https://git.repo/some_pkg.git#egg=AlsoSomePackage', ] - assert simple_combine(reqs) == expected + assert filter_requirements(reqs) == expected def test_strip_comments(): @@ -236,7 +236,7 @@ def test_strip_comments(): def test_python_pass_thru(): """ - Test that simple_combine() will pass through non-pep508 data. + Test that filter_requirements() will pass through non-pep508 data. """ reqs = { # various VCS and URL options @@ -267,7 +267,7 @@ def test_python_pass_thru(): '-e svn+http://svn.example.com/svn/MyProject/trunk@2019#egg=MyProject', ] - assert simple_combine(reqs) == expected + assert filter_requirements(reqs) == expected def test_excluded_system_requirements(): @@ -299,7 +299,7 @@ def test_excluded_system_requirements(): 'foo # from collection user', ] - assert simple_combine(reqs, exclude=excluded, is_python=False) == expected + assert filter_requirements(reqs, exclude=excluded, is_python=False) == expected def test_excluded_python_requirements(): @@ -332,12 +332,12 @@ def test_excluded_python_requirements(): "req1 # from collection user", ] - assert simple_combine(reqs, excluded) == expected + assert filter_requirements(reqs, excluded) == expected -def test_simple_combine_excludes_collections(): +def test_filter_requirements_excludes_collections(): """ - Test that excluding all requirements from a list of collections works in simple_combine(). + Test that excluding all requirements from a list of collections works in filter_requirements(). """ reqs = { "a.b": [ @@ -369,4 +369,4 @@ def test_simple_combine_excludes_collections(): "req1 # from collection user", ] - assert simple_combine(reqs, exclude_collections=excluded_collections) == expected + assert filter_requirements(reqs, exclude_collections=excluded_collections) == expected From aacad0326f7919a46e8fd937b74e9a3b4ca035d4 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Wed, 17 Apr 2024 10:05:41 -0400 Subject: [PATCH 22/28] Use constants for standard requirements file names --- src/ansible_builder/constants.py | 11 +++++-- src/ansible_builder/containerfile.py | 48 +++++++++++++++------------- test/unit/test_containerfile.py | 16 +++++----- 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/src/ansible_builder/constants.py b/src/ansible_builder/constants.py index 92309324..87088b32 100644 --- a/src/ansible_builder/constants.py +++ b/src/ansible_builder/constants.py @@ -32,11 +32,16 @@ default_keyring_name = 'keyring.gpg' default_policy_file_name = 'policy.json' +EXCL_COLLECTIONS_FILENAME = 'exclude-collections.txt' +STD_BINDEP_FILENAME = 'bindep.txt' +STD_GALAXY_FILENAME = 'requirements.yml' +STD_PIP_FILENAME = 'requirements.txt' + # Files that need to be moved into the build context, and their naming inside the context CONTEXT_FILES = { - 'galaxy': 'requirements.yml', - 'python': 'requirements.txt', - 'system': 'bindep.txt', + 'galaxy': STD_GALAXY_FILENAME, + 'python': STD_PIP_FILENAME, + 'system': STD_BINDEP_FILENAME, } FINAL_IMAGE_BIN_PATH = "/opt/builder/bin" diff --git a/src/ansible_builder/containerfile.py b/src/ansible_builder/containerfile.py index 1a1ad490..71781ab1 100644 --- a/src/ansible_builder/containerfile.py +++ b/src/ansible_builder/containerfile.py @@ -278,7 +278,9 @@ def _create_folder_copy_files(self) -> None: exclude_deps = self.definition.dependencies.get('exclude') if exclude_deps and 'all_from_collections' in exclude_deps: collection_ignore_list = exclude_deps['all_from_collections'] - dest = os.path.join(self.build_context, constants.user_content_subfolder, "exclude-collections.txt") + dest = os.path.join(self.build_context, + constants.user_content_subfolder, + constants.EXCL_COLLECTIONS_FILENAME) with tempfile.NamedTemporaryFile('w') as fp: fp.write('\n'.join(collection_ignore_list)) fp.flush() @@ -412,7 +414,7 @@ def _prepare_build_context(self) -> None: def _prepare_galaxy_install_steps(self) -> None: env = "" - install_opts = (f"-r {constants.CONTEXT_FILES['galaxy']} " + install_opts = (f"-r {constants.STD_GALAXY_FILENAME} " f"--collections-path \"{constants.base_collections_path}\"") if self.galaxy_ignore_signature_status_codes: @@ -434,7 +436,7 @@ def _prepare_galaxy_install_steps(self) -> None: self.steps.append( f"RUN ansible-galaxy role install $ANSIBLE_GALAXY_CLI_ROLE_OPTS " - f"-r {constants.CONTEXT_FILES['galaxy']}" + f"-r {constants.STD_GALAXY_FILENAME}" f" --roles-path \"{constants.base_roles_path}\"", ) step = f"RUN {env}ansible-galaxy collection install $ANSIBLE_GALAXY_CLI_COLLECTION_OPTS {install_opts}" @@ -451,54 +453,54 @@ def _prepare_introspect_assemble_steps(self) -> None: introspect_cmd = "RUN $PYCMD /output/scripts/introspect.py introspect" requirements_file_exists = os.path.exists(os.path.join( - self.build_outputs_dir, constants.CONTEXT_FILES['python'] + self.build_outputs_dir, constants.STD_PIP_FILENAME )) if requirements_file_exists: relative_requirements_path = os.path.join( constants.user_content_subfolder, - constants.CONTEXT_FILES['python'] + constants.STD_PIP_FILENAME ) - self.steps.append(f"COPY {relative_requirements_path} {constants.CONTEXT_FILES['python']}") + self.steps.append(f"COPY {relative_requirements_path} {constants.STD_PIP_FILENAME}") # WORKDIR is /build, so we use the (shorter) relative paths there - introspect_cmd += f" --user-pip={constants.CONTEXT_FILES['python']}" + introspect_cmd += f" --user-pip={constants.STD_PIP_FILENAME}" pip_exclude_exists = os.path.exists(os.path.join( - self.build_outputs_dir, f"exclude-{constants.CONTEXT_FILES['python']}" + self.build_outputs_dir, f"exclude-{constants.STD_PIP_FILENAME}" )) if pip_exclude_exists: relative_pip_exclude_path = os.path.join( constants.user_content_subfolder, - f"exclude-{constants.CONTEXT_FILES['python']}" + f"exclude-{constants.STD_PIP_FILENAME}" ) - self.steps.append(f"COPY {relative_pip_exclude_path} exclude-{constants.CONTEXT_FILES['python']}") - introspect_cmd += f" --exclude-pip-reqs=exclude-{constants.CONTEXT_FILES['python']}" + self.steps.append(f"COPY {relative_pip_exclude_path} exclude-{constants.STD_PIP_FILENAME}") + introspect_cmd += f" --exclude-pip-reqs=exclude-{constants.STD_PIP_FILENAME}" - bindep_exists = os.path.exists(os.path.join(self.build_outputs_dir, constants.CONTEXT_FILES['system'])) + bindep_exists = os.path.exists(os.path.join(self.build_outputs_dir, constants.STD_BINDEP_FILENAME)) if bindep_exists: - relative_bindep_path = os.path.join(constants.user_content_subfolder, constants.CONTEXT_FILES['system']) - self.steps.append(f"COPY {relative_bindep_path} {constants.CONTEXT_FILES['system']}") - introspect_cmd += f" --user-bindep={constants.CONTEXT_FILES['system']}" + relative_bindep_path = os.path.join(constants.user_content_subfolder, constants.STD_BINDEP_FILENAME) + self.steps.append(f"COPY {relative_bindep_path} {constants.STD_BINDEP_FILENAME}") + introspect_cmd += f" --user-bindep={constants.STD_BINDEP_FILENAME}" exclude_bindep_exists = os.path.exists(os.path.join( - self.build_outputs_dir, f"exclude-{constants.CONTEXT_FILES['system']}" + self.build_outputs_dir, f"exclude-{constants.STD_BINDEP_FILENAME}" )) if exclude_bindep_exists: relative_exclude_bindep_path = os.path.join( constants.user_content_subfolder, - f"exclude-{constants.CONTEXT_FILES['system']}" + f"exclude-{constants.STD_BINDEP_FILENAME}" ) - self.steps.append(f"COPY {relative_exclude_bindep_path} exclude-{constants.CONTEXT_FILES['system']}") - introspect_cmd += f" --exclude-bindep-reqs=exclude-{constants.CONTEXT_FILES['system']}" + self.steps.append(f"COPY {relative_exclude_bindep_path} exclude-{constants.STD_BINDEP_FILENAME}") + introspect_cmd += f" --exclude-bindep-reqs=exclude-{constants.STD_BINDEP_FILENAME}" exclude_collections_exists = os.path.exists(os.path.join( - self.build_outputs_dir, "exclude-collections.txt" + self.build_outputs_dir, constants.EXCL_COLLECTIONS_FILENAME )) if exclude_collections_exists: relative_exclude_collections_path = os.path.join( - constants.user_content_subfolder, "exclude-collections.txt") - self.steps.append(f"COPY {relative_exclude_collections_path} exclude-collections.txt") - introspect_cmd += " --exclude-collection-reqs=exclude-collections.txt" + constants.user_content_subfolder, constants.EXCL_COLLECTIONS_FILENAME) + self.steps.append(f"COPY {relative_exclude_collections_path} {constants.EXCL_COLLECTIONS_FILENAME}") + introspect_cmd += f" --exclude-collection-reqs={constants.EXCL_COLLECTIONS_FILENAME}" introspect_cmd += " --write-bindep=/tmp/src/bindep.txt --write-pip=/tmp/src/requirements.txt" diff --git a/test/unit/test_containerfile.py b/test/unit/test_containerfile.py index d988b4e8..2267f47d 100644 --- a/test/unit/test_containerfile.py +++ b/test/unit/test_containerfile.py @@ -51,9 +51,9 @@ def test_prepare_galaxy_install_steps(build_dir_and_ee_yml): c._prepare_galaxy_install_steps() expected = [ f"RUN ansible-galaxy role install $ANSIBLE_GALAXY_CLI_ROLE_OPTS " - f"-r {constants.CONTEXT_FILES['galaxy']} --roles-path \"{constants.base_roles_path}\"", + f"-r {constants.STD_GALAXY_FILENAME} --roles-path \"{constants.base_roles_path}\"", f"RUN ANSIBLE_GALAXY_DISABLE_GPG_VERIFY=1 ansible-galaxy collection install " - f"$ANSIBLE_GALAXY_CLI_COLLECTION_OPTS -r {constants.CONTEXT_FILES['galaxy']} " + f"$ANSIBLE_GALAXY_CLI_COLLECTION_OPTS -r {constants.STD_GALAXY_FILENAME} " f"--collections-path \"{constants.base_collections_path}\"" ] assert c.steps == expected @@ -69,9 +69,9 @@ def test_prepare_galaxy_install_steps_with_keyring(build_dir_and_ee_yml): c._prepare_galaxy_install_steps() expected = [ f"RUN ansible-galaxy role install $ANSIBLE_GALAXY_CLI_ROLE_OPTS " - f"-r {constants.CONTEXT_FILES['galaxy']} --roles-path \"{constants.base_roles_path}\"", + f"-r {constants.STD_GALAXY_FILENAME} --roles-path \"{constants.base_roles_path}\"", f"RUN ansible-galaxy collection install $ANSIBLE_GALAXY_CLI_COLLECTION_OPTS " - f"-r {constants.CONTEXT_FILES['galaxy']} " + f"-r {constants.STD_GALAXY_FILENAME} " f"--collections-path \"{constants.base_collections_path}\" --keyring \"{constants.default_keyring_name}\"" ] assert c.steps == expected @@ -90,9 +90,9 @@ def test_prepare_galaxy_install_steps_with_sigcount(build_dir_and_ee_yml): c._prepare_galaxy_install_steps() expected = [ f"RUN ansible-galaxy role install $ANSIBLE_GALAXY_CLI_ROLE_OPTS " - f"-r {constants.CONTEXT_FILES['galaxy']} --roles-path \"{constants.base_roles_path}\"", + f"-r {constants.STD_GALAXY_FILENAME} --roles-path \"{constants.base_roles_path}\"", f"RUN ansible-galaxy collection install $ANSIBLE_GALAXY_CLI_COLLECTION_OPTS " - f"-r {constants.CONTEXT_FILES['galaxy']} " + f"-r {constants.STD_GALAXY_FILENAME} " f"--collections-path \"{constants.base_collections_path}\" " f"--required-valid-signature-count {sig_count} --keyring \"{constants.default_keyring_name}\"" ] @@ -112,9 +112,9 @@ def test_prepare_galaxy_install_steps_with_ignore_code(build_dir_and_ee_yml): c._prepare_galaxy_install_steps() expected = [ f"RUN ansible-galaxy role install $ANSIBLE_GALAXY_CLI_ROLE_OPTS " - f"-r {constants.CONTEXT_FILES['galaxy']} --roles-path \"{constants.base_roles_path}\"", + f"-r {constants.STD_GALAXY_FILENAME} --roles-path \"{constants.base_roles_path}\"", f"RUN ansible-galaxy collection install $ANSIBLE_GALAXY_CLI_COLLECTION_OPTS " - f"-r {constants.CONTEXT_FILES['galaxy']} " + f"-r {constants.STD_GALAXY_FILENAME} " f"--collections-path \"{constants.base_collections_path}\" " f"--ignore-signature-status-code {codes[0]} --ignore-signature-status-code {codes[1]} " f"--keyring \"{constants.default_keyring_name}\"" From bf23263aaa649698deed9bd6b3d2a6cba7204fa4 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Wed, 17 Apr 2024 11:00:26 -0400 Subject: [PATCH 23/28] Simplify introspect command generation --- src/ansible_builder/containerfile.py | 79 +++++++++++----------------- test/unit/test_containerfile.py | 42 +++++++++++++++ 2 files changed, 73 insertions(+), 48 deletions(-) diff --git a/src/ansible_builder/containerfile.py b/src/ansible_builder/containerfile.py index 71781ab1..82b9756a 100644 --- a/src/ansible_builder/containerfile.py +++ b/src/ansible_builder/containerfile.py @@ -442,6 +442,23 @@ def _prepare_galaxy_install_steps(self) -> None: step = f"RUN {env}ansible-galaxy collection install $ANSIBLE_GALAXY_CLI_COLLECTION_OPTS {install_opts}" self.steps.append(step) + def _add_copy_for_file(self, filename: str) -> bool: + """ + If the given file exists within the context build directory, add a COPY command to the + instruction file steps. + + :param str filename: The base requirement filename to check. + + :return: True if file exists and COPY command was added, False otherwise. + """ + file_exists = os.path.exists(os.path.join(self.build_outputs_dir, filename)) + if file_exists: + relative_path = os.path.join(constants.user_content_subfolder, filename) + # WORKDIR is /build, so we use the (shorter) relative paths there + self.steps.append(f"COPY {relative_path} {filename}") + return True + return False + def _prepare_introspect_assemble_steps(self) -> None: # The introspect/assemble block is valid if there are any form of requirements deps: list[str] = [] @@ -449,57 +466,23 @@ def _prepare_introspect_assemble_steps(self) -> None: deps.extend( self.definition.get_dep_abs_path(thing, exclude=exclude) for thing in ('galaxy', 'system', 'python') ) + if any(deps): introspect_cmd = "RUN $PYCMD /output/scripts/introspect.py introspect" - requirements_file_exists = os.path.exists(os.path.join( - self.build_outputs_dir, constants.STD_PIP_FILENAME - )) - - if requirements_file_exists: - relative_requirements_path = os.path.join( - constants.user_content_subfolder, - constants.STD_PIP_FILENAME - ) - self.steps.append(f"COPY {relative_requirements_path} {constants.STD_PIP_FILENAME}") - # WORKDIR is /build, so we use the (shorter) relative paths there - introspect_cmd += f" --user-pip={constants.STD_PIP_FILENAME}" - - pip_exclude_exists = os.path.exists(os.path.join( - self.build_outputs_dir, f"exclude-{constants.STD_PIP_FILENAME}" - )) - if pip_exclude_exists: - relative_pip_exclude_path = os.path.join( - constants.user_content_subfolder, - f"exclude-{constants.STD_PIP_FILENAME}" - ) - self.steps.append(f"COPY {relative_pip_exclude_path} exclude-{constants.STD_PIP_FILENAME}") - introspect_cmd += f" --exclude-pip-reqs=exclude-{constants.STD_PIP_FILENAME}" - - bindep_exists = os.path.exists(os.path.join(self.build_outputs_dir, constants.STD_BINDEP_FILENAME)) - if bindep_exists: - relative_bindep_path = os.path.join(constants.user_content_subfolder, constants.STD_BINDEP_FILENAME) - self.steps.append(f"COPY {relative_bindep_path} {constants.STD_BINDEP_FILENAME}") - introspect_cmd += f" --user-bindep={constants.STD_BINDEP_FILENAME}" - - exclude_bindep_exists = os.path.exists(os.path.join( - self.build_outputs_dir, f"exclude-{constants.STD_BINDEP_FILENAME}" - )) - if exclude_bindep_exists: - relative_exclude_bindep_path = os.path.join( - constants.user_content_subfolder, - f"exclude-{constants.STD_BINDEP_FILENAME}" - ) - self.steps.append(f"COPY {relative_exclude_bindep_path} exclude-{constants.STD_BINDEP_FILENAME}") - introspect_cmd += f" --exclude-bindep-reqs=exclude-{constants.STD_BINDEP_FILENAME}" - - exclude_collections_exists = os.path.exists(os.path.join( - self.build_outputs_dir, constants.EXCL_COLLECTIONS_FILENAME - )) - if exclude_collections_exists: - relative_exclude_collections_path = os.path.join( - constants.user_content_subfolder, constants.EXCL_COLLECTIONS_FILENAME) - self.steps.append(f"COPY {relative_exclude_collections_path} {constants.EXCL_COLLECTIONS_FILENAME}") + for option, exc_option, req_file in ( + ('--user-pip', '--exclude-pip-reqs', constants.STD_PIP_FILENAME), + ('--user-bindep', '--exclude-bindep-reqs', constants.STD_BINDEP_FILENAME) + ): + if self._add_copy_for_file(req_file): + introspect_cmd += f" {option}={req_file}" + + exclude_req_file = f"exclude-{req_file}" + + if self._add_copy_for_file(exclude_req_file): + introspect_cmd += f" {exc_option}={exclude_req_file}" + + if self._add_copy_for_file(constants.EXCL_COLLECTIONS_FILENAME): introspect_cmd += f" --exclude-collection-reqs={constants.EXCL_COLLECTIONS_FILENAME}" introspect_cmd += " --write-bindep=/tmp/src/bindep.txt --write-pip=/tmp/src/requirements.txt" diff --git a/test/unit/test_containerfile.py b/test/unit/test_containerfile.py index 2267f47d..5cf37cbe 100644 --- a/test/unit/test_containerfile.py +++ b/test/unit/test_containerfile.py @@ -329,3 +329,45 @@ def test_v2_builder_image_default(build_dir_and_ee_yml): c.prepare() assert "FROM base as builder" in c.steps assert "COPY _build/scripts/pip_install /output/scripts/pip_install" not in c.steps + + +def test_prepare_introspect_assemble_steps(build_dir_and_ee_yml): + """ + Test that the introspect command is built as expected. + """ + + ee_data = """ + version: 3 + images: + base_image: + name: quay.io/user/mycustombaseimage:latest + dependencies: + python: + - six + system: + - git + galaxy: + collections: + - name: community.windows + exclude: + python: + - aaa + system: + - bbb + all_from_collections: + - a.b + """ + + tmpdir, ee_path = build_dir_and_ee_yml(ee_data) + c = make_containerfile(tmpdir, ee_path, run_validate=True) + c._create_folder_copy_files() + c._prepare_introspect_assemble_steps() + + expected_introspect_command = "RUN $PYCMD /output/scripts/introspect.py introspect" \ + f" --user-pip={constants.STD_PIP_FILENAME}" \ + f" --exclude-pip-reqs=exclude-{constants.STD_PIP_FILENAME}" \ + f" --user-bindep={constants.STD_BINDEP_FILENAME}" \ + f" --exclude-bindep-reqs=exclude-{constants.STD_BINDEP_FILENAME}" \ + f" --exclude-collection-reqs={constants.EXCL_COLLECTIONS_FILENAME}" \ + " --write-bindep=/tmp/src/bindep.txt --write-pip=/tmp/src/requirements.txt" + assert expected_introspect_command in c.steps From 73d72ec9d38a5d2a991c368aaeb7df13193056d3 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Fri, 19 Apr 2024 10:57:00 -0400 Subject: [PATCH 24/28] Add note about pep 508 and python requirements --- docs/collection_metadata.rst | 16 ++++++++++++---- docs/definition.rst | 10 ++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/docs/collection_metadata.rst b/docs/collection_metadata.rst index b0db4000..9865b245 100644 --- a/docs/collection_metadata.rst +++ b/docs/collection_metadata.rst @@ -99,13 +99,21 @@ Then, if the ``ansible_collection`` directory is in your home directory, you can Python Dependencies ^^^^^^^^^^^^^^^^^^^ -Ansible Builder combines all the Python requirements files from all collections into a single file using the ``requirements-parser`` library. This library supports complex syntax, including references to other files. +Ansible Builder combines all the Python requirements files from all collections into a single file. -If multiple collections require the same *package name*, Ansible Builder combines them into a single entry and combines the constraints. +Certain package names are specifically *ignored* by ``ansible-builder``, meaning that Ansible Builder +does not include them in the combined file of Python dependencies, even if a collection lists them as +dependencies. These include test packages and packages that provide Ansible itself. The full list can +be found in ``EXCLUDE_REQUIREMENTS`` in ``src/ansible_builder/_target_scripts/introspect.py``. -Certain package names are specifically *ignored* by ``ansible-builder``, meaning that Ansible Builder does not include them in the combined file of Python dependencies, even if a collection lists them as dependencies. These include test packages and packages that provide Ansible itself. The full list can be found in ``EXCLUDE_REQUIREMENTS`` in ``src/ansible_builder/_target_scripts/introspect.py``. +If you need to include one of these ignored package names, use the ``--user-pip`` option of the +``introspect`` command to list it in the user requirements file. Packages supplied this way are +not processed against the list of excluded Python packages. -If you need to include one of these ignored package names, use the ``--user-pip`` option of the ``introspect`` command to list it in the user requirements file. Packages supplied this way are not processed against the list of excluded Python packages. +.. note:: + + These dependencies are subject to the same `PEP 508 `_ format + restrictions described for Python requirements in the :ref:`EE definition specification `. System-level Dependencies ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/definition.rst b/docs/definition.rst index 59389e7c..281c6fb9 100644 --- a/docs/definition.rst +++ b/docs/definition.rst @@ -237,10 +237,20 @@ The following keys are valid for this section: ``requirements.yml`` file (see below for examples). Read more about the requirements file format in the `Galaxy user guide `_. + .. _python-pep508: + ``python`` The Python installation requirements. This may either be a filename, or a list of requirements (see below for an example). + .. note:: + + Python requirement specifications are expected to be limited to features defined by + `PEP 508 `_. Hash tag comments will always be allowed. + Any deviation from this specification will be passed through to pip unverified and unaltered, + although this is considered undefined and unsupported behavior. It is not recommended that + you depend on this behavior. + ``python_interpreter`` A dictionary that defines the Python system package name to be installed by ``dnf`` (``package_system``) and/or a path to the Python interpreter to be used From 6c3651b1e2a4807fa49edc5ecc436c741da34134 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Mon, 22 Apr 2024 14:43:42 -0400 Subject: [PATCH 25/28] Fix schema and add new integration test for exclude files --- src/ansible_builder/ee_schema.py | 44 +++++++++++++------------ test/data/v3/complete/ee.yml | 7 ++++ test/integration/test_create.py | 56 ++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 20 deletions(-) diff --git a/src/ansible_builder/ee_schema.py b/src/ansible_builder/ee_schema.py index b1239c1a..dcb71439 100644 --- a/src/ansible_builder/ee_schema.py +++ b/src/ansible_builder/ee_schema.py @@ -273,26 +273,30 @@ ], }, "exclude": { - "python": { - "description": "A list of python package names", - "type": "array", - "items": { - "type: string" - } - }, - "system": { - "description": "A list of system package names", - "type": "array", - "items": { - "type: string" - } - }, - "all_from_collections": { - "description": "A list of collection names", - "type": "array", - "items": { - "type": "string" - } + "type": "object", + "additionalProperties": False, + "properties": { + "python": { + "description": "A list of python package names", + "type": "array", + "items": { + "type": "string" + } + }, + "system": { + "description": "A list of system package names", + "type": "array", + "items": { + "type": "string" + } + }, + "all_from_collections": { + "description": "A list of collection names", + "type": "array", + "items": { + "type": "string" + } + }, }, }, }, diff --git a/test/data/v3/complete/ee.yml b/test/data/v3/complete/ee.yml index a0bf3ee6..534002d5 100644 --- a/test/data/v3/complete/ee.yml +++ b/test/data/v3/complete/ee.yml @@ -32,6 +32,13 @@ dependencies: system: - python311 - mysql + exclude: + python: + - foo + system: + - bar + all_from_collections: + - a.b additional_build_files: - src: files/random.cfg diff --git a/test/integration/test_create.py b/test/integration/test_create.py index 155ce974..7e1f7dbf 100644 --- a/test/integration/test_create.py +++ b/test/integration/test_create.py @@ -542,3 +542,59 @@ def test_v3_additional_build_files(cli, build_dir_and_ee_yml): cfg.touch() cli(f'ansible-builder create -c {tmpdir} -f {eeyml} --output-filename Containerfile') + + +def test_exclude_files_created(cli, build_dir_and_ee_yml): + """ + Test that the bindep and Python requirement exclude files are created. + """ + tmpdir, eeyml = build_dir_and_ee_yml( + """ + version: 3 + images: + base_image: + name: quay.io/ansible/awx-ee:latest + dependencies: + galaxy: + collections: + - name: community.crypto + exclude: + python: + - pyyaml + system: + - openssh-client + all_from_collections: + - a.b + """ + ) + + cli(f'ansible-builder create -c {tmpdir} -f {eeyml} --output-filename Containerfile') + + # Validate that the bindep exclude file is created with proper content + bindep_exclude = tmpdir / constants.user_content_subfolder / f"exclude-{constants.STD_BINDEP_FILENAME}" + assert bindep_exclude.exists() + text = bindep_exclude.read_text() + assert text == "openssh-client" + + # Validate that the Python requirements exclude file is created with proper content + pyreq_exclude = tmpdir / constants.user_content_subfolder / f"exclude-{constants.STD_PIP_FILENAME}" + assert pyreq_exclude.exists() + text = pyreq_exclude.read_text() + assert text == "pyyaml" + + # Validate that the collections exclude file is created with proper content + coll_exclude = tmpdir / constants.user_content_subfolder / f"{constants.EXCL_COLLECTIONS_FILENAME}" + assert coll_exclude.exists() + text = coll_exclude.read_text() + assert text == "a.b" + + # Validate that the EE will use the exclude files + containerfile = tmpdir / "Containerfile" + assert containerfile.exists() + text = containerfile.read_text() + expected_introspect_command = "RUN $PYCMD /output/scripts/introspect.py introspect" \ + f" --exclude-pip-reqs=exclude-{constants.STD_PIP_FILENAME}" \ + f" --exclude-bindep-reqs=exclude-{constants.STD_BINDEP_FILENAME}" \ + f" --exclude-collection-reqs={constants.EXCL_COLLECTIONS_FILENAME}" \ + " --write-bindep=/tmp/src/bindep.txt --write-pip=/tmp/src/requirements.txt\n" + assert expected_introspect_command in text From a438f944044c9a21ad4beb7e2bf3098fadffc063 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Mon, 22 Apr 2024 15:57:04 -0400 Subject: [PATCH 26/28] Add all_from_collections to documentation --- docs/definition.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/definition.rst b/docs/definition.rst index 281c6fb9..7087e82a 100644 --- a/docs/definition.rst +++ b/docs/definition.rst @@ -265,7 +265,9 @@ The following keys are valid for this section: of referenced collections. These exclusions will not apply to the user supplied Python or system dependencies, nor will they apply to dependencies of dependencies (top-level only). Python dependency exclusions should be a list of package names appearing under the ``python`` key name. System dependency - exclusions should be a list of system package names appearing under the ``system`` key name. + exclusions should be a list of system package names appearing under the ``system`` key name. If you + want to exclude *all* Python and system dependencies from one or more collections, supply the list + of collection names under the ``all_from_collections`` key. The exclusion string should be the simple name of the requirement you want excluded. For example, if you need to exclude the system requirement that appears as ``foo [!platform:gentoo]`` within @@ -282,6 +284,9 @@ The following keys are valid for this section: - docker system: - python3-Cython + all_from_collections: + - community.crypto + - community.docker .. note:: The ``exclude`` option requires ``ansible-builder`` version ``3.1`` or newer. From 17e795d60820e0122d46b1317b2fb35293045058 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Thu, 2 May 2024 13:42:21 -0400 Subject: [PATCH 27/28] Allow regex matching in excludes --- docs/definition.rst | 47 ++++++++++----- .../_target_scripts/introspect.py | 42 ++++++++++---- test/unit/test_introspect.py | 58 ++++++++++++++++++- 3 files changed, 122 insertions(+), 25 deletions(-) diff --git a/docs/definition.rst b/docs/definition.rst index 7087e82a..caa7c6ea 100644 --- a/docs/definition.rst +++ b/docs/definition.rst @@ -261,20 +261,39 @@ The following keys are valid for this section: be a filename, or a list of requirements (see below for an example). ``exclude`` - A list of Python or system requirements to be excluded from the top-level dependency requirements - of referenced collections. These exclusions will not apply to the user supplied Python or system - dependencies, nor will they apply to dependencies of dependencies (top-level only). Python dependency - exclusions should be a list of package names appearing under the ``python`` key name. System dependency - exclusions should be a list of system package names appearing under the ``system`` key name. If you - want to exclude *all* Python and system dependencies from one or more collections, supply the list - of collection names under the ``all_from_collections`` key. + A dictionary defining the Python or system requirements to be excluded from the top-level dependency + requirements of referenced collections. These exclusions will not apply to the user supplied Python or + system dependencies, nor will they apply to dependencies of dependencies (top-level only). - The exclusion string should be the simple name of the requirement you want excluded. For example, - if you need to exclude the system requirement that appears as ``foo [!platform:gentoo]`` within - an included collection, then your exclusion string should be ``foo``. To exclude the Python - requirement ``bar == 1.0.0``, your exclusion string would be ``bar``. + The following keys are valid for this section: - Example: + * ``python`` - A list of Python dependencies to be excluded. + * ``system`` - A list of system dependencies to be excluded. + * ``all_from_collections`` - If you want to exclude *all* Python and system dependencies from one or + more collections, supply a list of collection names under this key. + + The exclusion feature supports two forms of matching: + + * Simple name matching. + * Advanced name matching using regular expressions. + + For simple name matching, you need only supply the name of the requirement/collection to match. + All values will be compared in a case-insensitive manner. + + For advanced name matching, begin the exclusion string with the tilde (``~``) character to + indicate that the remaining portion of the string is a regular expression to be used to match + a requirement/collection name. The regex should be considered case-insensitive. + + .. note:: + The regular expression must match the full requirement/collection name. For example, ``~foo.`` + does not fully match the name ``foobar``, but ``~foo.+`` does. + + With both forms of matching, the exclusion string will be compared against the *simple* name of + any Python or system requirement. For example, if you need to exclude the system requirement that + appears as ``foo [!platform:gentoo]`` within an included collection, then your exclusion string should be + ``foo``. To exclude the Python requirement ``bar == 1.0.0``, your exclusion string would be ``bar``. + + Example using both simple and advanced matching: .. code:: yaml @@ -285,8 +304,8 @@ The following keys are valid for this section: system: - python3-Cython all_from_collections: - - community.crypto - - community.docker + # Regular expression to exclude all from community collections + - ~community\..+ .. note:: The ``exclude`` option requires ``ansible-builder`` version ``3.1`` or newer. diff --git a/src/ansible_builder/_target_scripts/introspect.py b/src/ansible_builder/_target_scripts/introspect.py index 66a87617..43c97825 100644 --- a/src/ansible_builder/_target_scripts/introspect.py +++ b/src/ansible_builder/_target_scripts/introspect.py @@ -276,6 +276,26 @@ def strip_comments(reqs: dict[str, list]) -> dict[str, list]: return result +def should_be_excluded(value: str, exclusion_list: list[str]) -> bool: + """ + Test if `value` matches against any value in `exclusion_list`. + + The exclusion_list values are either strings to be compared in a case-insensitive + manner against value, OR, they are regular expressions to be tested against the + value. A regular expression will contain '~' as the first character. + + :return: True if the value should be excluded, False otherwise. + """ + for exclude_value in exclusion_list: + if exclude_value[0] == "~": + pattern = exclude_value[1:] + if re.fullmatch(pattern.lower(), value.lower()): + return True + elif exclude_value.lower() == value.lower(): + return True + return False + + def filter_requirements(reqs: dict[str, list], exclude: list[str] | None = None, exclude_collections: list[str] | None = None, @@ -302,16 +322,16 @@ def filter_requirements(reqs: dict[str, list], collection_ignore_list: list[str] = [] if exclude: - exclusions = [r.lower() for r in exclude] + exclusions = exclude.copy() if exclude_collections: - collection_ignore_list = [c.lower() for c in exclude_collections] + collection_ignore_list = exclude_collections.copy() annotated_lines: list[str] = [] uncommented_reqs = strip_comments(reqs) for collection, lines in uncommented_reqs.items(): # Bypass this collection if we've been told to ignore all requirements from it. - if collection.lower() in collection_ignore_list: + if should_be_excluded(collection, collection_ignore_list): logger.debug("# Excluding all requirements from collection '%s'", collection) continue @@ -332,14 +352,16 @@ def filter_requirements(reqs: dict[str, list], # bindep system requirements have the package name as the first "word" on the line name = line.split(maxsplit=1)[0] - lower_name = name.lower() + if collection.lower() not in {'user', 'exclude'}: + lower_name = name.lower() - if lower_name in exclusions and collection not in {'user', 'exclude'}: - logger.debug("# Explicitly excluding requirement '%s' from '%s'", name, collection) - continue - if lower_name in EXCLUDE_REQUIREMENTS and collection not in {'user', 'exclude'}: - logger.debug("# Excluding requirement '%s' from '%s'", name, collection) - continue + if lower_name in EXCLUDE_REQUIREMENTS: + logger.debug("# Excluding requirement '%s' from '%s'", name, collection) + continue + + if should_be_excluded(lower_name, exclusions): + logger.debug("# Explicitly excluding requirement '%s' from '%s'", name, collection) + continue annotated_lines.append(f'{line} # from collection {collection}') diff --git a/test/unit/test_introspect.py b/test/unit/test_introspect.py index 74ab4b6b..3e57cc63 100644 --- a/test/unit/test_introspect.py +++ b/test/unit/test_introspect.py @@ -123,7 +123,7 @@ def test_yaml_extension(data_dir): } -def test_sanitize_pep508(): +def test_filter_requirements_pep508(): reqs = { 'a.b': [ 'foo[ext1,ext3] == 1', @@ -370,3 +370,59 @@ def test_filter_requirements_excludes_collections(): ] assert filter_requirements(reqs, exclude_collections=excluded_collections) == expected + + +def test_requirement_regex_exclusions(): + reqs = { + "a.b": [ + "foo", + "shimmy", + "kungfoo", + "aaab", + ], + "c.d": [ + "foobar", + "shake", + "ab", + ] + } + + excluded = [ + "Foo", # straight string comparison (case shouldn't matter) + "foo.", # straight string comparison (shouldn't match) + "~foo.", # regex (shouldn't match b/c not full string match) + "~Sh.*", # regex (case shouldn't matter) + "~^.+ab", # regex + ] + + expected = [ + "kungfoo # from collection a.b", + "foobar # from collection c.d", + "ab # from collection c.d" + ] + + assert filter_requirements(reqs, excluded) == expected + + +def test_collection_regex_exclusions(): + reqs = { + "a.b": ["foo"], + "c.d": ["bar"], + "ab.cd": ["foobar"], + "e.f": ["baz"], + "be.fun": ["foobaz"], + } + + excluded_collections = [ + r"~A\..+", # regex (case shouldn't matter) + "E.F", # straight string comparison (case shouldn't matter) + "~b.c", # regex (shouldn't match b/c not full string match) + ] + + expected = [ + "bar # from collection c.d", + "foobar # from collection ab.cd", + "foobaz # from collection be.fun", + ] + + assert filter_requirements(reqs, exclude_collections=excluded_collections) == expected From 865cb9c4170dfe58e95b43a807a454c636b0e277 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Tue, 4 Jun 2024 10:01:59 -0400 Subject: [PATCH 28/28] Add packaging to base install --- src/ansible_builder/containerfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ansible_builder/containerfile.py b/src/ansible_builder/containerfile.py index 82b9756a..c0c70513 100644 --- a/src/ansible_builder/containerfile.py +++ b/src/ansible_builder/containerfile.py @@ -145,7 +145,7 @@ def prepare(self) -> None: self._insert_global_args() if image == "base": - self.steps.append("RUN $PYCMD -m pip install --no-cache-dir bindep pyyaml") + self.steps.append("RUN $PYCMD -m pip install --no-cache-dir bindep pyyaml packaging") else: # For an EE schema earlier than v3 with a custom builder image, we always make sure pip is available. context_dir = Path(self.build_outputs_dir).stem