From d4b4d6d9d3eea3a470b649ac16dbda38fba76d98 Mon Sep 17 00:00:00 2001 From: "Brian M. Carr" Date: Tue, 19 Feb 2019 14:43:24 -0600 Subject: [PATCH] Add support for data_files #890 --- docs/docs/pyproject.md | 53 +++++++++++++++++++ poetry/masonry/builders/builder.py | 1 + poetry/masonry/builders/sdist.py | 18 ++++++- poetry/masonry/builders/wheel.py | 18 +++++-- poetry/masonry/utils/data_file_include.py | 16 ++++++ poetry/masonry/utils/include.py | 2 +- poetry/masonry/utils/module.py | 10 +++- poetry/masonry/utils/package_include.py | 2 +- poetry/packages/project_package.py | 1 + poetry/poetry.py | 3 ++ .../builders/fixtures/data_files/a.txt | 0 .../data_files/data_files_example/__init__.py | 0 .../fixtures/data_files/pyproject.toml | 10 ++++ .../data_files/subdir/subsubdir/b.txt | 0 .../data_files/subdir/subsubdir/c.csv | 0 tests/masonry/builders/test_sdist.py | 35 ++++++++++++ tests/masonry/builders/test_wheel.py | 25 +++++++++ 17 files changed, 186 insertions(+), 8 deletions(-) create mode 100644 poetry/masonry/utils/data_file_include.py create mode 100644 tests/masonry/builders/fixtures/data_files/a.txt create mode 100644 tests/masonry/builders/fixtures/data_files/data_files_example/__init__.py create mode 100644 tests/masonry/builders/fixtures/data_files/pyproject.toml create mode 100644 tests/masonry/builders/fixtures/data_files/subdir/subsubdir/b.txt create mode 100644 tests/masonry/builders/fixtures/data_files/subdir/subsubdir/c.csv diff --git a/docs/docs/pyproject.md b/docs/docs/pyproject.md index ced3a0b9a09..88ee00cd525 100644 --- a/docs/docs/pyproject.md +++ b/docs/docs/pyproject.md @@ -133,6 +133,59 @@ packages = [ Poetry is clever enough to detect Python subpackages. Thus, you only have to specify the directory where your root package resides. + +## data_files + +A list of files to be installed using the [data_files](https://docs.python.org/2/distutils/setupscript.html#installing-additional-files) +installation mechanism. Data files are particularly useful when shipping non-code artifacts that may need to be used +by other packages at a well-known location. Example uses include distribution of Protobuf proto definition files and +Avro avsc schemas. + +```toml +[tool.poetry.data_files] +my_package_name = ["a.txt", "subdir/subsubdir/b.txt"] +"my_target_dir.has.dots" = ["**/*.txt"] +"my_package_name/with/subdirectories" = ["subdir/subsubdir/b.txt"] +``` + +### Effect on produced wheel archives + +The above TOML snippet will result in the addition of a [.data](https://www.python.org/dev/peps/pep-0427/#the-data-directory) +directory to your wheel. For example, given my package, `my_package_name`, and a version of `2.3.4`, the wheel will now +contain the following: + +``` +my_package_name-2.3.4.data/my_package_name/a.txt +my_package_name-2.3.4.data/my_package_name/b.txt +my_package_name-2.3.4.data/my_target_dir.has.dots/a.txt +my_package_name-2.3.4.data/my_target_dir.has.dots/b.txt +my_package_name-2.3.4.data/my_packge_name/with/subdirectories/b.txt +``` + +All of the entries added to the wheel are also added to the RECORD with their appropriate secure hashes. + +### Effect on produced sdist archives + +The above TOML snippet will result in the addition of the following `data_files` element to the generated setup.py: + +```python +# [...] +data_files = \ +[('my_package_name', ['a.txt', 'subdir/subsubdir/b.txt']), + ('my_target_dir.has.dots', ['a.txt', 'subdir/subsubdir/b.txt']), + ('my_package_name/with/subdirectories', ['subdir/subsubdir/b.txt'])] + +setup_kwargs = { + # [...] + 'data_files': data_files, +} +``` + + +!!!note + + The path information in the files or globs is discarded during installation. If you need your files to be placed + in a nested directory, it must be a part of the "name" of the `data_files` element. ## include and exclude diff --git a/poetry/masonry/builders/builder.py b/poetry/masonry/builders/builder.py index d0494726e7c..aa47c7b436c 100644 --- a/poetry/masonry/builders/builder.py +++ b/poetry/masonry/builders/builder.py @@ -38,6 +38,7 @@ def __init__(self, poetry, env, io): self._path.as_posix(), packages=self._package.packages, includes=self._package.include, + data_files=self._package.data_files, ) self._meta = Metadata.from_package(self._package) diff --git a/poetry/masonry/builders/sdist.py b/poetry/masonry/builders/sdist.py index cb0959f115b..5ae289dae52 100644 --- a/poetry/masonry/builders/sdist.py +++ b/poetry/masonry/builders/sdist.py @@ -15,6 +15,7 @@ from poetry.utils._compat import to_str from ..utils.helpers import normalize_file_permissions +from ..utils.data_file_include import DataFileInclude from ..utils.package_include import PackageInclude from .builder import Builder @@ -119,6 +120,7 @@ def build_setup(self): # type: () -> bytes modules = [] packages = [] package_data = {} + data_files = {} for include in self._module.includes: if isinstance(include, PackageInclude): if include.is_package(): @@ -137,6 +139,10 @@ def build_setup(self): # type: () -> bytes if module not in modules: modules.append(module) + elif isinstance(include, DataFileInclude): + data_files.setdefault(include.data_file_path_prefix, []).extend( + str(element.relative_to(self._path)) for element in include.elements + ) else: pass @@ -153,8 +159,16 @@ def build_setup(self): # type: () -> bytes extra.append("'package_data': package_data,") if modules: - before.append("modules = \\\n{}".format(pformat(modules))) - extra.append("'py_modules': modules,".format()) + before.append("modules = \\\n{}\n".format(pformat(modules))) + extra.append("'py_modules': modules,") + + if data_files: + before.append( + "data_files = \\\n{}\n".format( + pformat([(k, v) for k, v in data_files.items()]) + ) + ) + extra.append("'data_files': data_files,") dependencies, extras = self.convert_dependencies( self._package, self._package.requires diff --git a/poetry/masonry/builders/wheel.py b/poetry/masonry/builders/wheel.py index fd7382480cd..cd6c6800701 100644 --- a/poetry/masonry/builders/wheel.py +++ b/poetry/masonry/builders/wheel.py @@ -17,6 +17,7 @@ from poetry.semver import parse_constraint from ..utils.helpers import normalize_file_permissions +from ..utils.data_file_include import DataFileInclude from ..utils.package_include import PackageInclude from ..utils.tags import get_abbr_impl from ..utils.tags import get_abi_tag @@ -24,6 +25,7 @@ from ..utils.tags import get_platform from .builder import Builder +from poetry.utils._compat import Path wheel_file_template = """\ Wheel-Version: 1.0 @@ -136,6 +138,14 @@ def _copy_module(self, wheel): if isinstance(include, PackageInclude) and include.source: rel_file = file.relative_to(include.base) + elif isinstance(include, DataFileInclude): + rel_file = Path( + self.wheel_meta_dir_name( + self._package.name, self._meta.version, "data" + ), + include.data_file_path_prefix, + file.name, + ) else: rel_file = file.relative_to(self._path) @@ -192,7 +202,7 @@ def find_excluded_files(self): # type: () -> Set @property def dist_info(self): # type: () -> str - return self.dist_info_name(self._package.name, self._meta.version) + return self.wheel_meta_dir_name(self._package.name, self._meta.version) @property def wheel_filename(self): # type: () -> str @@ -207,11 +217,13 @@ def supports_python2(self): parse_constraint(">=2.0.0 <3.0.0") ) - def dist_info_name(self, distribution, version): # type: (...) -> str + def wheel_meta_dir_name( + self, distribution, version, suffix="dist-info" + ): # type: (...) -> str escaped_name = re.sub(r"[^\w\d.]+", "_", distribution, flags=re.UNICODE) escaped_version = re.sub(r"[^\w\d.]+", "_", version, flags=re.UNICODE) - return "{}-{}.dist-info".format(escaped_name, escaped_version) + return "{}-{}.{}".format(escaped_name, escaped_version, suffix) @property def tag(self): diff --git a/poetry/masonry/utils/data_file_include.py b/poetry/masonry/utils/data_file_include.py new file mode 100644 index 00000000000..7b316ac4667 --- /dev/null +++ b/poetry/masonry/utils/data_file_include.py @@ -0,0 +1,16 @@ +from .include import Include + +# noinspection PyProtectedMember +from poetry.utils._compat import Path + + +class DataFileInclude(Include): + def __init__( + self, base, include, data_file_path_prefix + ): # type: (Path, str, str) -> None + super(DataFileInclude, self).__init__(base, include) + self._data_file_path_prefix = data_file_path_prefix + + @property + def data_file_path_prefix(self): # type: () -> str + return self._data_file_path_prefix diff --git a/poetry/masonry/utils/include.py b/poetry/masonry/utils/include.py index a5978407fba..dff05fc795e 100644 --- a/poetry/masonry/utils/include.py +++ b/poetry/masonry/utils/include.py @@ -21,7 +21,7 @@ def __init__(self, base, include): # type: (Path, str) -> None self._base = base self._include = str(include) - self._elements = sorted(list(self._base.glob(str(self._include)))) + self._elements = sorted(list(self._base.glob(self._include))) @property def base(self): # type: () -> Path diff --git a/poetry/masonry/utils/module.py b/poetry/masonry/utils/module.py index 07c1422c7ef..525988bc3dd 100644 --- a/poetry/masonry/utils/module.py +++ b/poetry/masonry/utils/module.py @@ -2,6 +2,7 @@ from poetry.utils.helpers import module_name from .include import Include +from .data_file_include import DataFileInclude from .package_include import PackageInclude @@ -11,7 +12,9 @@ class ModuleOrPackageNotFound(ValueError): class Module: - def __init__(self, name, directory=".", packages=None, includes=None): + def __init__( + self, name, directory=".", packages=None, includes=None, data_files=None + ): self._name = module_name(name) self._in_src = False self._is_package = False @@ -19,6 +22,7 @@ def __init__(self, name, directory=".", packages=None, includes=None): self._includes = [] packages = packages or [] includes = includes or [] + data_files = data_files or {} if not packages: # It must exist either as a .py file or a directory, but not both @@ -62,6 +66,10 @@ def __init__(self, name, directory=".", packages=None, includes=None): PackageInclude(self._path, package["include"], package.get("from")) ) + for base, globs in data_files.items(): + for glob in globs: + self._includes.append(DataFileInclude(self._path, glob, base)) + for include in includes: self._includes.append(Include(self._path, include)) diff --git a/poetry/masonry/utils/package_include.py b/poetry/masonry/utils/package_include.py index 5fe597bf430..2fcf51236bf 100644 --- a/poetry/masonry/utils/package_include.py +++ b/poetry/masonry/utils/package_include.py @@ -26,7 +26,7 @@ def source(self): # type: () -> str def is_package(self): # type: () -> bool return self._is_package - def is_module(self): # type: () + def is_module(self): # type: () -> bool return self._is_module def refresh(self): # type: () -> PackageInclude diff --git a/poetry/packages/project_package.py b/poetry/packages/project_package.py index d24ef11f359..cd645b267c0 100644 --- a/poetry/packages/project_package.py +++ b/poetry/packages/project_package.py @@ -14,6 +14,7 @@ def __init__(self, name, version, pretty_version=None): self.packages = [] self.include = [] self.exclude = [] + self.data_files = {} if self._python_versions == "*": self._python_constraint = parse_constraint("~2.7 || >=3.4") diff --git a/poetry/poetry.py b/poetry/poetry.py index f9b93cad4a9..6d07215fb23 100644 --- a/poetry/poetry.py +++ b/poetry/poetry.py @@ -184,6 +184,9 @@ def create(cls, cwd): # type: (Path) -> Poetry if "packages" in local_config: package.packages = local_config["packages"] + if "data_files" in local_config: + package.data_files = local_config["data_files"] + # Moving lock if necessary (pyproject.lock -> poetry.lock) lock = poetry_file.parent / "poetry.lock" if not lock.exists(): diff --git a/tests/masonry/builders/fixtures/data_files/a.txt b/tests/masonry/builders/fixtures/data_files/a.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/masonry/builders/fixtures/data_files/data_files_example/__init__.py b/tests/masonry/builders/fixtures/data_files/data_files_example/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/masonry/builders/fixtures/data_files/pyproject.toml b/tests/masonry/builders/fixtures/data_files/pyproject.toml new file mode 100644 index 00000000000..d27e6a34c0e --- /dev/null +++ b/tests/masonry/builders/fixtures/data_files/pyproject.toml @@ -0,0 +1,10 @@ +[tool.poetry] +name = "data_files_example" +version = "0.1.0" +description = "An example TOML file describing a package with data_files" +authors = ["delphyne "] + +[tool.poetry.data_files] +easy = ["a.txt", "**/c.csv"] +"a.little.more.difficult" = ["**/*.txt"] +"nested/directories" = ["subdir/subsubdir/b.txt"] \ No newline at end of file diff --git a/tests/masonry/builders/fixtures/data_files/subdir/subsubdir/b.txt b/tests/masonry/builders/fixtures/data_files/subdir/subsubdir/b.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/masonry/builders/fixtures/data_files/subdir/subsubdir/c.csv b/tests/masonry/builders/fixtures/data_files/subdir/subsubdir/c.csv new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/masonry/builders/test_sdist.py b/tests/masonry/builders/test_sdist.py index 3afee699c88..082f0274100 100644 --- a/tests/masonry/builders/test_sdist.py +++ b/tests/masonry/builders/test_sdist.py @@ -510,3 +510,38 @@ def test_proper_python_requires_if_three_digits_precision_version_specified(): parsed = p.parsestr(to_str(pkg_info)) assert parsed["Requires-Python"] == "==2.7.15" + + +def test_with_data_files(): + poetry = Poetry.create(project("data_files")) + builder = SdistBuilder(poetry, NullEnv(), NullIO()) + + # Check setup.py + setup = builder.build_setup() + setup_ast = ast.parse(setup) + + setup_ast.body = [n for n in setup_ast.body if isinstance(n, ast.Assign)] + ns = {} + exec(compile(setup_ast, filename="setup.py", mode="exec"), ns) + + assert [ + ("easy", ["a.txt", "subdir/subsubdir/c.csv"]), + ("a.little.more.difficult", ["a.txt", "subdir/subsubdir/b.txt"]), + ("nested/directories", ["subdir/subsubdir/b.txt"]), + ] == ns.get("data_files") + + assert ns.get("setup_kwargs", {}).get("data_files") == ns.get("data_files") + + builder.build() + + sdist = fixtures_dir / "data_files" / "dist" / "data_files_example-0.1.0.tar.gz" + + assert sdist.exists() + + with tarfile.open(str(sdist), "r") as tar: + names = tar.getnames() + assert "data_files_example-0.1.0/data_files_example/__init__.py" in names + assert "data_files_example-0.1.0/a.txt" in names + assert "data_files_example-0.1.0/subdir/subsubdir/b.txt" in names + assert "data_files_example-0.1.0/subdir/subsubdir/c.csv" in names + assert "data_files_example-0.1.0/setup.py" in names diff --git a/tests/masonry/builders/test_wheel.py b/tests/masonry/builders/test_wheel.py index d6aa6c03e9e..8e6d466f417 100644 --- a/tests/masonry/builders/test_wheel.py +++ b/tests/masonry/builders/test_wheel.py @@ -144,3 +144,28 @@ def test_write_metadata_file_license_homepage_default(mocker): # Assertion mocked_file.write.assert_any_call("Home-page: UNKNOWN\n") mocked_file.write.assert_any_call("License: UNKNOWN\n") + + +def test_with_data_files(): + module_path = fixtures_dir / "data_files" + p = Poetry.create(str(module_path)) + WheelBuilder.make(p, NullEnv(), NullIO()) + whl = module_path / "dist" / "data_files_example-0.1.0-py2.py3-none-any.whl" + assert whl.exists() + + with zipfile.ZipFile(str(whl)) as z: + names = z.namelist() + with z.open("data_files_example-0.1.0.dist-info/RECORD") as record_file: + record = record_file.readlines() + + def validate(path): + assert path in names + assert ( + len([r for r in record if r.decode("utf-8").startswith(path)]) == 1 + ) + + validate("data_files_example-0.1.0.data/easy/a.txt") + validate("data_files_example-0.1.0.data/easy/c.csv") + validate("data_files_example-0.1.0.data/a.little.more.difficult/a.txt") + validate("data_files_example-0.1.0.data/a.little.more.difficult/b.txt") + validate("data_files_example-0.1.0.data/nested/directories/b.txt")