From 991d4c07722c9ee09c26a97f4dc77b8766639941 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Thu, 21 Dec 2023 09:45:09 +0900 Subject: [PATCH] feat(toolchain, pip.parse): introduce a new '_host' toolchain repo This is for passing it in repository_rules and relies on the canonical label representation introduced in bazel 6.0 and symlink support (needs to be present on Windows) to work. This allows the users to not need to `load` the interpreter label from a `.bzl` file but instead specify the label in the form of `@_host//:python`. Work towards #1643. --- CHANGELOG.md | 13 ++++++ examples/build_file_generation/WORKSPACE | 4 +- examples/multi_python_versions/WORKSPACE | 12 ++--- examples/pip_parse/WORKSPACE | 3 +- examples/pip_parse_vendored/BUILD.bazel | 24 ++-------- examples/pip_parse_vendored/WORKSPACE | 3 +- examples/pip_parse_vendored/requirements.bzl | 5 +- examples/pip_repository_annotations/WORKSPACE | 3 +- python/pip_install/pip_repository.bzl | 12 ++--- python/private/bzlmod/pip.bzl | 6 ++- python/private/bzlmod/pythons_hub.bzl | 23 +++++++--- python/private/toolchains_repo.bzl | 46 ++++++++++++++++++- python/repositories.bzl | 8 ++++ 13 files changed, 104 insertions(+), 58 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index af61b44cce..e2652ee71a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,8 +25,21 @@ A brief description of the categories of changes: ### Fixed +* (bzlmod pip.parse) Use a platform-independent reference to the interpreter + pip uses. This reduces (but doesn't eliminate) the amount of + platform-specific content in `MODULE.bazel.lock` files; Follow + [#1643](https://github.com/bazelbuild/rules_python/issues/1643) for removing + platform-specific content in `MODULE.bazel.lock` files. + ### Added +* (toolchains) `python_register_toolchains` now also generates a repository + that is suffixed with `_host`, that has a single label `:python` that is a + symlink to the python interpreter for the host platform. The intended use is + mainly in `repository_rule`, which are always run using `host` platform + Python. This means that `WORKSPACE` users can now copy the `requirements.bzl` + file for vendoring as seen in the updated `pip_parse_vendored` example. + ## [0.28.0] - 2024-01-07 [0.28.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.28.0 diff --git a/examples/build_file_generation/WORKSPACE b/examples/build_file_generation/WORKSPACE index e283260ea2..3f1fad8a8d 100644 --- a/examples/build_file_generation/WORKSPACE +++ b/examples/build_file_generation/WORKSPACE @@ -84,8 +84,6 @@ python_register_toolchains( python_version = "3.9", ) -# Load the interpreter and pip_parse rules. -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "pip_parse") # This macro wraps the `pip_repository` rule that invokes `pip`, with `incremental` set. @@ -114,7 +112,7 @@ pip_parse( # 3. Wrapper script, like in the autodetecting python toolchain. # # Here, we use the interpreter constant that resolves to the host interpreter from the default Python toolchain. - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", # Set the location of the lock file. requirements_lock = "//:requirements_lock.txt", requirements_windows = "//:requirements_windows.txt", diff --git a/examples/multi_python_versions/WORKSPACE b/examples/multi_python_versions/WORKSPACE index 35855ca1e1..f3a69ce769 100644 --- a/examples/multi_python_versions/WORKSPACE +++ b/examples/multi_python_versions/WORKSPACE @@ -28,19 +28,15 @@ python_register_multi_toolchains( ) load("@python//:pip.bzl", "multi_pip_parse") -load("@python//3.10:defs.bzl", interpreter_3_10 = "interpreter") -load("@python//3.11:defs.bzl", interpreter_3_11 = "interpreter") -load("@python//3.8:defs.bzl", interpreter_3_8 = "interpreter") -load("@python//3.9:defs.bzl", interpreter_3_9 = "interpreter") multi_pip_parse( name = "pypi", default_version = default_python_version, python_interpreter_target = { - "3.10": interpreter_3_10, - "3.11": interpreter_3_11, - "3.8": interpreter_3_8, - "3.9": interpreter_3_9, + "3.10": "@python_3_10_host//:python", + "3.11": "@python_3_11_host//:python", + "3.8": "@python_3_8_host//:python", + "3.9": "@python_3_9_host//:python", }, requirements_lock = { "3.10": "//requirements:requirements_lock_3_10.txt", diff --git a/examples/pip_parse/WORKSPACE b/examples/pip_parse/WORKSPACE index 415d064ed6..1a3a6b081f 100644 --- a/examples/pip_parse/WORKSPACE +++ b/examples/pip_parse/WORKSPACE @@ -14,7 +14,6 @@ python_register_toolchains( python_version = "3.9", ) -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "pip_parse") pip_parse( @@ -52,7 +51,7 @@ pip_parse( # 3. Wrapper script, like in the autodetecting python toolchain. # # Here, we use the interpreter constant that resolves to the host interpreter from the default Python toolchain. - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", # (Optional) You can set quiet to False if you want to see pip output. #quiet = False, diff --git a/examples/pip_parse_vendored/BUILD.bazel b/examples/pip_parse_vendored/BUILD.bazel index ddf3281924..b856c94525 100644 --- a/examples/pip_parse_vendored/BUILD.bazel +++ b/examples/pip_parse_vendored/BUILD.bazel @@ -9,24 +9,6 @@ compile_pip_requirements( src = "requirements.in", ) -# The requirements.bzl file is generated with a reference to the interpreter for the host platform. -# In order to check in a platform-agnostic file, we have to replace that reference with the symbol -# loaded from our python toolchain. -genrule( - name = "make_platform_agnostic", - srcs = ["@pip//:requirements.bzl"], - outs = ["requirements.clean.bzl"], - cmd = " | ".join([ - "cat $<", - # Insert our load statement after the existing one so we don't produce a file with buildifier warnings - """sed -e '/^load.*.pip.bzl/i\\'$$'\\n''load("@python39//:defs.bzl", "interpreter")'""", - # Replace the bazel 6.0.0 specific comment with something that bazel 5.4.0 would produce. - # This enables this example to be run as a test under bazel 5.4.0. - """sed -e 's#@//#//#'""", - """sed 's#"@python39_.*//:bin/python3"#interpreter#' >$@""", - ]), -) - write_file( name = "gen_update", out = "update.sh", @@ -35,14 +17,14 @@ write_file( "#!/usr/bin/env bash", # Bazel gives us a way to access the source folder! "cd $BUILD_WORKSPACE_DIRECTORY", - "cp -fv bazel-bin/requirements.clean.bzl requirements.bzl", + "cp -fv bazel-pip_parse_vendored/external/pip/requirements.bzl requirements.bzl", ], ) sh_binary( name = "vendor_requirements", srcs = ["update.sh"], - data = [":make_platform_agnostic"], + data = ["@pip//:requirements.bzl"], ) # Similarly ensures that the requirements.bzl file is updated @@ -51,5 +33,5 @@ diff_test( name = "test_vendored", failure_message = "Please run: bazel run //:vendor_requirements", file1 = "requirements.bzl", - file2 = ":make_platform_agnostic", + file2 = "@pip//:requirements.bzl", ) diff --git a/examples/pip_parse_vendored/WORKSPACE b/examples/pip_parse_vendored/WORKSPACE index 157f70aeb6..e0b7c86b62 100644 --- a/examples/pip_parse_vendored/WORKSPACE +++ b/examples/pip_parse_vendored/WORKSPACE @@ -14,14 +14,13 @@ python_register_toolchains( python_version = "3.9", ) -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "pip_parse") # This repository isn't referenced, except by our test that asserts the requirements.bzl is updated. # It also wouldn't be needed by users of this ruleset. pip_parse( name = "pip", - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", requirements_lock = "//:requirements.txt", ) diff --git a/examples/pip_parse_vendored/requirements.bzl b/examples/pip_parse_vendored/requirements.bzl index adbee66caf..206f8bb65a 100644 --- a/examples/pip_parse_vendored/requirements.bzl +++ b/examples/pip_parse_vendored/requirements.bzl @@ -1,10 +1,9 @@ """Starlark representation of locked requirements. @generated by rules_python pip_parse repository rule -from //:requirements.txt +from @//:requirements.txt """ -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "pip_utils") load("@rules_python//python/pip_install:pip_repository.bzl", "group_library", "whl_library") @@ -17,7 +16,7 @@ all_whl_requirements = all_whl_requirements_by_package.values() all_data_requirements = ["@pip//certifi:data", "@pip//charset_normalizer:data", "@pip//idna:data", "@pip//requests:data", "@pip//urllib3:data"] _packages = [("pip_certifi", "certifi==2023.7.22 --hash=sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082 --hash=sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"), ("pip_charset_normalizer", "charset-normalizer==2.1.1 --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 --hash=sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"), ("pip_idna", "idna==3.4 --hash=sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4 --hash=sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"), ("pip_requests", "requests==2.28.1 --hash=sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983 --hash=sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"), ("pip_urllib3", "urllib3==1.26.13 --hash=sha256:47cc05d99aaa09c9e72ed5809b60e7ba354e64b59c9c173ac3018642d8bb41fc --hash=sha256:c083dd0dce68dbfbe1129d5271cb90f9447dea7d52097c6e0126120c521ddea8")] -_config = {"download_only": False, "enable_implicit_namespace_pkgs": False, "environment": {}, "extra_pip_args": [], "isolated": True, "pip_data_exclude": [], "python_interpreter": "python3", "python_interpreter_target": interpreter, "quiet": True, "repo": "pip", "repo_prefix": "pip_", "timeout": 600} +_config = {"download_only": False, "enable_implicit_namespace_pkgs": False, "environment": {}, "extra_pip_args": [], "isolated": True, "pip_data_exclude": [], "python_interpreter": "python3", "python_interpreter_target": "@python39_host//:python", "quiet": True, "repo": "pip", "repo_prefix": "pip_", "timeout": 600} _annotations = {} def requirement(name): diff --git a/examples/pip_repository_annotations/WORKSPACE b/examples/pip_repository_annotations/WORKSPACE index 35350550ef..8540555084 100644 --- a/examples/pip_repository_annotations/WORKSPACE +++ b/examples/pip_repository_annotations/WORKSPACE @@ -14,7 +14,6 @@ python_register_toolchains( python_version = "3.9", ) -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "package_annotation", "pip_parse") # Here we can see an example of annotations being applied to an arbitrary @@ -54,7 +53,7 @@ write_file( pip_parse( name = "pip", annotations = ANNOTATIONS, - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", requirements_lock = "//:requirements.txt", ) diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index 3e4878bd02..a1e3de47e1 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -22,7 +22,6 @@ load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse load("//python/pip_install/private:generate_group_library_build_bazel.bzl", "generate_group_library_build_bazel") load("//python/pip_install/private:generate_whl_library_build_bazel.bzl", "generate_whl_library_build_bazel") load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS") -load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:parse_whl_name.bzl", "parse_whl_name") load("//python/private:patch_whl.bzl", "patch_whl") @@ -87,13 +86,12 @@ def _resolve_python_interpreter(rctx): if rctx.attr.python_interpreter_target != None: python_interpreter = rctx.path(rctx.attr.python_interpreter_target) - if BZLMOD_ENABLED: - (os, _) = get_host_os_arch(rctx) + (os, _) = get_host_os_arch(rctx) - # On Windows, the symlink doesn't work because Windows attempts to find - # Python DLLs where the symlink is, not where the symlink points. - if os == WINDOWS_NAME: - python_interpreter = python_interpreter.realpath + # On Windows, the symlink doesn't work because Windows attempts to find + # Python DLLs where the symlink is, not where the symlink points. + if os == WINDOWS_NAME: + python_interpreter = python_interpreter.realpath elif "/" not in python_interpreter: # It's a plain command, e.g. "python3", to look up in the environment. found_python_interpreter = rctx.which(python_interpreter) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 6d45a26d7b..f843a8fb04 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -87,8 +87,10 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): # we programmatically find it. hub_name = pip_attr.hub_name if python_interpreter_target == None and not pip_attr.python_interpreter: - python_name = "python_" + version_label(pip_attr.python_version, sep = "_") - if python_name not in INTERPRETER_LABELS.keys(): + python_name = "python_{}_host".format( + version_label(pip_attr.python_version, sep = "_"), + ) + if python_name not in INTERPRETER_LABELS: fail(( "Unable to find interpreter for pip hub '{hub_name}' for " + "python_version={version}: Make sure a corresponding " + diff --git a/python/private/bzlmod/pythons_hub.bzl b/python/private/bzlmod/pythons_hub.bzl index 5f536f3b67..3889e13409 100644 --- a/python/private/bzlmod/pythons_hub.bzl +++ b/python/private/bzlmod/pythons_hub.bzl @@ -78,7 +78,7 @@ DEFAULT_PYTHON_VERSION = "{default_python_version}" """ _line_for_hub_template = """\ - "{name}": Label("@{name}_{platform}//:{path}"), + "{key}": Label("@{name}_{platform}//:{path}"), """ def _hub_repo_impl(rctx): @@ -103,11 +103,22 @@ def _hub_repo_impl(rctx): # Create a dict that is later used to create # a symlink to a interpreter. - interpreter_labels = "".join([_line_for_hub_template.format( - name = name, - platform = platform, - path = path, - ) for name in rctx.attr.toolchain_user_repository_names]) + interpreter_labels = "".join([ + _line_for_hub_template.format( + key = name + ("" if platform_str != "host" else "_host"), + name = name, + platform = platform_str, + path = p, + ) + for name in rctx.attr.toolchain_user_repository_names + for platform_str, p in { + # NOTE @aignas 2023-12-21: maintaining the `platform` specific key + # here may be unneeded in the long term, but I am not sure if there + # are other users that depend on it. + platform: path, + "host": "python", + }.items() + ]) rctx.file( "interpreters.bzl", diff --git a/python/private/toolchains_repo.bzl b/python/private/toolchains_repo.bzl index 4b6bd11460..c7b61780d8 100644 --- a/python/private/toolchains_repo.bzl +++ b/python/private/toolchains_repo.bzl @@ -240,8 +240,50 @@ def compile_pip_requirements(name, **kwargs): toolchain_aliases = repository_rule( _toolchain_aliases_impl, - doc = """Creates a repository with a shorter name meant for the host platform, which contains - a BUILD.bazel file declaring aliases to the host platform's targets. + doc = """\ +Creates a repository with a shorter name only referencing the python version, +it contains a BUILD.bazel file declaring aliases to the host platform's targets +and is a great fit for any usage related to setting up toolchains for build +actions.""", + attrs = { + "platforms": attr.string_list( + doc = "List of platforms for which aliases shall be created", + ), + "python_version": attr.string(doc = "The Python version."), + "user_repository_name": attr.string( + mandatory = True, + doc = "The base name for all created repositories, like 'python38'.", + ), + "_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")), + }, +) + +def _host_toolchain_impl(rctx): + rctx.file("BUILD.bazel", """\ +# Generated by python/private/toolchains_repo.bzl + +exports_files(["python"], visibility = ["//visibility:public"]) +""") + + (os_name, arch) = get_host_os_arch(rctx) + host_platform = get_host_platform(os_name, arch) + host_python = rctx.path( + Label( + "@@{py_repository}_{host_platform}//:python".format( + py_repository = rctx.attr.name[:-len("_host")], + host_platform = host_platform, + ), + ), + ) + rctx.symlink(host_python, "python") + +host_toolchain = repository_rule( + _host_toolchain_impl, + doc = """\ +Creates a repository with a shorter name meant to be used in the repository_ctx, +which needs to have `symlinks` for the interpreter. This is separate from the +toolchain_aliases repo because referencing the `python` interpreter target from +this repo causes an eager fetch of the toolchain for the host platform. """, attrs = { "platforms": attr.string_list( diff --git a/python/repositories.bzl b/python/repositories.bzl index 21becb5290..7422a50bb0 100644 --- a/python/repositories.bzl +++ b/python/repositories.bzl @@ -27,6 +27,7 @@ load("//python/private:full_version.bzl", "full_version") load("//python/private:internal_config_repo.bzl", "internal_config_repo") load( "//python/private:toolchains_repo.bzl", + "host_toolchain", "multi_toolchain_aliases", "toolchain_aliases", "toolchains_repo", @@ -589,6 +590,13 @@ def python_register_toolchains( platform = platform, )) + host_toolchain( + name = name + "_host", + python_version = python_version, + user_repository_name = name, + platforms = loaded_platforms, + ) + toolchain_aliases( name = name, python_version = python_version,