From 2e66eb7147ae1e8a799b2276c7340f48f63a7220 Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Mon, 1 Nov 2021 19:47:33 +0000 Subject: [PATCH] Fix 1461: Better loop breaker for `manifest_maker` The inconsistency for the `package_data` configuration in sdists when `include_package_data=True` in #1461 have been causing some problems for the community for a while, as also shown in #2835. As pointed out by [@jaraco](https://github.com/pypa/setuptools/issues/1461#issuecomment-749092366), this was being caused by a mechanism to break the recursion between the `egg_info` and `sdist` commands. In summary the loop is caused by the following behaviour: - the `egg_info` command uses a subclass of `sdist` (`manifest_maker`) to calculate the MANIFEST, - the `sdist` class needs to know the MANIFEST to calculate the data files when `include_package_data=True` Previously, the mechanism to break this loop was to simply ignore the data files in `sdist` when `include_package_data=True`. The approach implemented in this change was to replace this mechanism, by allowing `manifest_maker` to override the `_safe_data_files` method from `sdist`. --- Please notice [an extensive experiment] (https://github.com/abravalheri/experiment-setuptools-package-data) was carried out to investigate the previous confusing behaviour. There is also [a simplified theoretical analysis] (https://github.com/pyscaffold/pyscaffold/pull/535#issuecomment-956296407) comparing the observed behavior in the experiment and the expected one. This analysis point out to the same offender indicated by [@jaraco](https://github.com/pypa/setuptools/issues/1461#issuecomment-749092366) (which is being replaced in this change). --- changelog.d/1461.change.rst | 3 +++ setuptools/command/build_py.py | 13 +++++++++++++ setuptools/command/egg_info.py | 11 +++++++++++ setuptools/command/sdist.py | 13 ++++++++----- setuptools/tests/test_sdist.py | 24 ++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 changelog.d/1461.change.rst diff --git a/changelog.d/1461.change.rst b/changelog.d/1461.change.rst new file mode 100644 index 0000000000..fa940dbfff --- /dev/null +++ b/changelog.d/1461.change.rst @@ -0,0 +1,3 @@ +Fix inconsistency with ``include_package_data`` and ``packages_data`` in sdist +by replacing the loop breaking mechanism between the ``sdist`` and +``egg_info`` commands -- by :user:`abravalheri` diff --git a/setuptools/command/build_py.py b/setuptools/command/build_py.py index 6a61543342..f71c5a569f 100644 --- a/setuptools/command/build_py.py +++ b/setuptools/command/build_py.py @@ -67,6 +67,19 @@ def _get_data_files(self): self.analyze_manifest() return list(map(self._get_pkg_data_files, self.packages or ())) + def get_data_files_without_manifest(self): + """ + Generate list of ``(package,src_dir,build_dir,filenames)`` tuples, + but without triggering any attempt to analyze or build the manifest. + """ + # Avoid triggering dynamic behavior in __getattr__ + if 'data_files' in self.__dict__: + return self.data_files + # Prevent eventual errors from unset `manifest_files` + # (that would otherwise be set by `analyze_manifest`) + self.__dict__.setdefault('manifest_files', {}) + return list(map(self._get_pkg_data_files, self.packages or ())) + def _get_pkg_data_files(self, package): # Locate package source directory src_dir = self.get_package_dir(package) diff --git a/setuptools/command/egg_info.py b/setuptools/command/egg_info.py index 18b81340a7..ff009861c1 100644 --- a/setuptools/command/egg_info.py +++ b/setuptools/command/egg_info.py @@ -608,6 +608,17 @@ def prune_file_list(self): self.filelist.exclude_pattern(r'(^|' + sep + r')(RCS|CVS|\.svn)' + sep, is_regex=1) + def _safe_data_files(self, build_py): + """ + The parent class implementation of this method (``sdist``) will try to + include data files, which might cause recursion problems, when + ``include_package_data=True``. + + Therefore we have to avoid triggering any attempt of analyzing/building + the manifest again. + """ + return build_py.get_data_files_without_manifest() + def write_file(filename, contents): """Create a file with the specified name and write 'contents' (a diff --git a/setuptools/command/sdist.py b/setuptools/command/sdist.py index e8062f2e41..0285b690fc 100644 --- a/setuptools/command/sdist.py +++ b/setuptools/command/sdist.py @@ -114,12 +114,15 @@ def _add_defaults_python(self): def _safe_data_files(self, build_py): """ - Extracting data_files from build_py is known to cause - infinite recursion errors when `include_package_data` - is enabled, so suppress it in that case. + Since the ``sdist`` class is also used to compute the MANIFEST + (via :obj:`setuptools.command.egg_info.manifest_maker`), + there might be recursion problems when trying to obtain the list of + data_files and ``include_package_data=True`` (which in turn depends on + the files included in the MANIFEST). + + To avoid that, ``manifest_maker`` should be able to overwrite this + method and avoid recursive attempts to build/analyze the MANIFEST. """ - if self.distribution.include_package_data: - return () return build_py.data_files def _add_data_files(self, data_files): diff --git a/setuptools/tests/test_sdist.py b/setuptools/tests/test_sdist.py index 049fdcc05a..98eb1a4f24 100644 --- a/setuptools/tests/test_sdist.py +++ b/setuptools/tests/test_sdist.py @@ -126,6 +126,30 @@ def test_package_data_in_sdist(self): assert os.path.join('sdist_test', 'c.rst') not in manifest assert os.path.join('d', 'e.dat') in manifest + def test_package_data_and_include_package_data_in_sdist(self): + """Make sure there is no recursion when ``include_package_data=True``. + + The mention to a recursion can be first found in: + https://github.com/pypa/setuptools/commit/f6cb3d29d919ff6b9313b8a3281c66cfb368c29b + """ + + setup_attrs = {**SETUP_ATTRS, 'include_package_data': True} + assert setup_attrs['package_data'] # make sure we have both options + + dist = Distribution(setup_attrs) + dist.script_name = 'setup.py' + cmd = sdist(dist) + cmd.ensure_finalized() + + with quiet(): + cmd.run() + + manifest = cmd.filelist.files + assert os.path.join('sdist_test', 'a.txt') in manifest + assert os.path.join('sdist_test', 'b.txt') in manifest + assert os.path.join('sdist_test', 'c.rst') not in manifest + assert os.path.join('d', 'e.dat') in manifest + def test_setup_py_exists(self): dist = Distribution(SETUP_ATTRS) dist.script_name = 'foo.py'