From a82d3d7b2335f97c6a6775b42c63edc896b9b3f3 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Thu, 21 Dec 2023 09:45:09 +0900 Subject: [PATCH] feat(toolchain, pip.parse): introduce a new '_host' toolchain repo This is for passing it in repository_rules and relies on the canonical label representation introduced in bazel 6.0 and symlink support (needs to be present on Windows) to work. This allows the users to not need to `load` the interpreter label from a `.bzl` file but instead specify the label in the form of `@_host//:python`. Work towards #1643. --- CHANGELOG.md | 10 ++++ examples/build_file_generation/WORKSPACE | 4 +- examples/multi_python_versions/WORKSPACE | 12 ++--- examples/pip_parse/WORKSPACE | 3 +- examples/pip_parse_vendored/WORKSPACE | 3 +- examples/pip_repository_annotations/WORKSPACE | 3 +- python/private/bzlmod/pip.bzl | 6 ++- python/private/bzlmod/pythons_hub.bzl | 23 +++++++--- python/private/toolchains_repo.bzl | 46 ++++++++++++++++++- python/repositories.bzl | 8 ++++ 10 files changed, 91 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b524536b8c..58b33b10aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,11 @@ A brief description of the categories of changes: specifying a local system interpreter. * (bzlmod pip.parse) Requirements files with duplicate entries for the same package (e.g. one for the package, one for an extra) now work. +* (bzlmod pip.parse) Use the same interpreter label that may make the lock file + almost the same for all platforms. It will, however be different in cases where + the lock file is introducing platform-specific deps (e.g. `colorama` may be + present only in the `windows` specific requirements lock file because it + is not used elsewhere. ### Added @@ -53,6 +58,11 @@ A brief description of the categories of changes: * (gazelle) `file` generation mode can now also add `__init__.py` to the srcs attribute for every target in the package. This is enabled through a separate directive `python_generation_mode_per_file_include_init`. +* (toolchains) `python_register_toolchains` now also generates a repository + that is suffixed with `_host`, that has a single label `:python` that is a + symlink to the python interpreter for the host platform. The intended use is + mainly in `repository_rule`, which are always run using `host` platform + Python. [0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0 diff --git a/examples/build_file_generation/WORKSPACE b/examples/build_file_generation/WORKSPACE index e283260ea2..3f1fad8a8d 100644 --- a/examples/build_file_generation/WORKSPACE +++ b/examples/build_file_generation/WORKSPACE @@ -84,8 +84,6 @@ python_register_toolchains( python_version = "3.9", ) -# Load the interpreter and pip_parse rules. -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "pip_parse") # This macro wraps the `pip_repository` rule that invokes `pip`, with `incremental` set. @@ -114,7 +112,7 @@ pip_parse( # 3. Wrapper script, like in the autodetecting python toolchain. # # Here, we use the interpreter constant that resolves to the host interpreter from the default Python toolchain. - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", # Set the location of the lock file. requirements_lock = "//:requirements_lock.txt", requirements_windows = "//:requirements_windows.txt", diff --git a/examples/multi_python_versions/WORKSPACE b/examples/multi_python_versions/WORKSPACE index 35855ca1e1..f3a69ce769 100644 --- a/examples/multi_python_versions/WORKSPACE +++ b/examples/multi_python_versions/WORKSPACE @@ -28,19 +28,15 @@ python_register_multi_toolchains( ) load("@python//:pip.bzl", "multi_pip_parse") -load("@python//3.10:defs.bzl", interpreter_3_10 = "interpreter") -load("@python//3.11:defs.bzl", interpreter_3_11 = "interpreter") -load("@python//3.8:defs.bzl", interpreter_3_8 = "interpreter") -load("@python//3.9:defs.bzl", interpreter_3_9 = "interpreter") multi_pip_parse( name = "pypi", default_version = default_python_version, python_interpreter_target = { - "3.10": interpreter_3_10, - "3.11": interpreter_3_11, - "3.8": interpreter_3_8, - "3.9": interpreter_3_9, + "3.10": "@python_3_10_host//:python", + "3.11": "@python_3_11_host//:python", + "3.8": "@python_3_8_host//:python", + "3.9": "@python_3_9_host//:python", }, requirements_lock = { "3.10": "//requirements:requirements_lock_3_10.txt", diff --git a/examples/pip_parse/WORKSPACE b/examples/pip_parse/WORKSPACE index 415d064ed6..1a3a6b081f 100644 --- a/examples/pip_parse/WORKSPACE +++ b/examples/pip_parse/WORKSPACE @@ -14,7 +14,6 @@ python_register_toolchains( python_version = "3.9", ) -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "pip_parse") pip_parse( @@ -52,7 +51,7 @@ pip_parse( # 3. Wrapper script, like in the autodetecting python toolchain. # # Here, we use the interpreter constant that resolves to the host interpreter from the default Python toolchain. - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", # (Optional) You can set quiet to False if you want to see pip output. #quiet = False, diff --git a/examples/pip_parse_vendored/WORKSPACE b/examples/pip_parse_vendored/WORKSPACE index 157f70aeb6..e0b7c86b62 100644 --- a/examples/pip_parse_vendored/WORKSPACE +++ b/examples/pip_parse_vendored/WORKSPACE @@ -14,14 +14,13 @@ python_register_toolchains( python_version = "3.9", ) -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "pip_parse") # This repository isn't referenced, except by our test that asserts the requirements.bzl is updated. # It also wouldn't be needed by users of this ruleset. pip_parse( name = "pip", - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", requirements_lock = "//:requirements.txt", ) diff --git a/examples/pip_repository_annotations/WORKSPACE b/examples/pip_repository_annotations/WORKSPACE index 35350550ef..8540555084 100644 --- a/examples/pip_repository_annotations/WORKSPACE +++ b/examples/pip_repository_annotations/WORKSPACE @@ -14,7 +14,6 @@ python_register_toolchains( python_version = "3.9", ) -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "package_annotation", "pip_parse") # Here we can see an example of annotations being applied to an arbitrary @@ -54,7 +53,7 @@ write_file( pip_parse( name = "pip", annotations = ANNOTATIONS, - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", requirements_lock = "//:requirements.txt", ) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 6d45a26d7b..f843a8fb04 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -87,8 +87,10 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): # we programmatically find it. hub_name = pip_attr.hub_name if python_interpreter_target == None and not pip_attr.python_interpreter: - python_name = "python_" + version_label(pip_attr.python_version, sep = "_") - if python_name not in INTERPRETER_LABELS.keys(): + python_name = "python_{}_host".format( + version_label(pip_attr.python_version, sep = "_"), + ) + if python_name not in INTERPRETER_LABELS: fail(( "Unable to find interpreter for pip hub '{hub_name}' for " + "python_version={version}: Make sure a corresponding " + diff --git a/python/private/bzlmod/pythons_hub.bzl b/python/private/bzlmod/pythons_hub.bzl index 5f536f3b67..3889e13409 100644 --- a/python/private/bzlmod/pythons_hub.bzl +++ b/python/private/bzlmod/pythons_hub.bzl @@ -78,7 +78,7 @@ DEFAULT_PYTHON_VERSION = "{default_python_version}" """ _line_for_hub_template = """\ - "{name}": Label("@{name}_{platform}//:{path}"), + "{key}": Label("@{name}_{platform}//:{path}"), """ def _hub_repo_impl(rctx): @@ -103,11 +103,22 @@ def _hub_repo_impl(rctx): # Create a dict that is later used to create # a symlink to a interpreter. - interpreter_labels = "".join([_line_for_hub_template.format( - name = name, - platform = platform, - path = path, - ) for name in rctx.attr.toolchain_user_repository_names]) + interpreter_labels = "".join([ + _line_for_hub_template.format( + key = name + ("" if platform_str != "host" else "_host"), + name = name, + platform = platform_str, + path = p, + ) + for name in rctx.attr.toolchain_user_repository_names + for platform_str, p in { + # NOTE @aignas 2023-12-21: maintaining the `platform` specific key + # here may be unneeded in the long term, but I am not sure if there + # are other users that depend on it. + platform: path, + "host": "python", + }.items() + ]) rctx.file( "interpreters.bzl", diff --git a/python/private/toolchains_repo.bzl b/python/private/toolchains_repo.bzl index 4b6bd11460..c7b61780d8 100644 --- a/python/private/toolchains_repo.bzl +++ b/python/private/toolchains_repo.bzl @@ -240,8 +240,50 @@ def compile_pip_requirements(name, **kwargs): toolchain_aliases = repository_rule( _toolchain_aliases_impl, - doc = """Creates a repository with a shorter name meant for the host platform, which contains - a BUILD.bazel file declaring aliases to the host platform's targets. + doc = """\ +Creates a repository with a shorter name only referencing the python version, +it contains a BUILD.bazel file declaring aliases to the host platform's targets +and is a great fit for any usage related to setting up toolchains for build +actions.""", + attrs = { + "platforms": attr.string_list( + doc = "List of platforms for which aliases shall be created", + ), + "python_version": attr.string(doc = "The Python version."), + "user_repository_name": attr.string( + mandatory = True, + doc = "The base name for all created repositories, like 'python38'.", + ), + "_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")), + }, +) + +def _host_toolchain_impl(rctx): + rctx.file("BUILD.bazel", """\ +# Generated by python/private/toolchains_repo.bzl + +exports_files(["python"], visibility = ["//visibility:public"]) +""") + + (os_name, arch) = get_host_os_arch(rctx) + host_platform = get_host_platform(os_name, arch) + host_python = rctx.path( + Label( + "@@{py_repository}_{host_platform}//:python".format( + py_repository = rctx.attr.name[:-len("_host")], + host_platform = host_platform, + ), + ), + ) + rctx.symlink(host_python, "python") + +host_toolchain = repository_rule( + _host_toolchain_impl, + doc = """\ +Creates a repository with a shorter name meant to be used in the repository_ctx, +which needs to have `symlinks` for the interpreter. This is separate from the +toolchain_aliases repo because referencing the `python` interpreter target from +this repo causes an eager fetch of the toolchain for the host platform. """, attrs = { "platforms": attr.string_list( diff --git a/python/repositories.bzl b/python/repositories.bzl index e444c49a2b..01a3b108ad 100644 --- a/python/repositories.bzl +++ b/python/repositories.bzl @@ -27,6 +27,7 @@ load("//python/private:full_version.bzl", "full_version") load("//python/private:internal_config_repo.bzl", "internal_config_repo") load( "//python/private:toolchains_repo.bzl", + "host_toolchain", "multi_toolchain_aliases", "toolchain_aliases", "toolchains_repo", @@ -585,6 +586,13 @@ def python_register_toolchains( platform = platform, )) + host_toolchain( + name = name + "_host", + python_version = python_version, + user_repository_name = name, + platforms = loaded_platforms, + ) + toolchain_aliases( name = name, python_version = python_version,