From 5bdb208a78e704f84f91e38127a241e4ba8fa100 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Thu, 5 Oct 2023 09:12:09 -0700 Subject: [PATCH] Make lockfile's `RepoSpec` attributes more readable String-valued tag and repo rule attributes in the `MODULE.bazel.lock` file have to be serialized in a way that distinguishes them from label-valued attributes. This commit uses the fact that the string representation of labels always starts with `@@` to escape strings only if they start with `@@` (or happen to be of the form of an escaped string). Since string-valued attributes rarely start with `@@`, in particular when specified by humans and not macros, this greatly reduces the need for escaping in the lockfile. This commit raises the lockfile version to 3. Along the way also disables HTML escaping for attributes as it isn't needed and results in less readable representations of certain characters. Closes #19684. PiperOrigin-RevId: 571037360 Change-Id: I04bb60d371cd09326780004122e729d78c99732a --- scripts/bootstrap/compile.sh | 5 ++- .../bazel/bzlmod/AttributeValuesAdapter.java | 44 ++++++++++++++----- .../lib/bazel/bzlmod/BazelLockFileValue.java | 2 +- .../bzlmod/AttributeValuesAdapterTest.java | 23 +++++++++- .../shell/bazel/bazel_determinism_test.sh | 3 +- src/tools/bzlmod/utils.bzl | 21 +++++---- 6 files changed, 73 insertions(+), 25 deletions(-) diff --git a/scripts/bootstrap/compile.sh b/scripts/bootstrap/compile.sh index aaefd24c3ad233..e09d67d22fbe2c 100755 --- a/scripts/bootstrap/compile.sh +++ b/scripts/bootstrap/compile.sh @@ -254,8 +254,9 @@ EOF link_file "${PWD}/src/MODULE.tools" "${BAZEL_TOOLS_REPO}/MODULE.bazel" new_hash=$(shasum -a 256 "${BAZEL_TOOLS_REPO}/MODULE.bazel" | awk '{print $1}') sed -i.bak "/\"bazel_tools\":/s/\"[a-f0-9]*\"/\"$new_hash\"/" MODULE.bazel.lock - # TODO: Temporary hack for lockfile version mismatch, remove these two lines after updating to 6.4.0 - sed -i.bak 's/"lockFileVersion": 1/"lockFileVersion": 2/' MODULE.bazel.lock + # TODO: Temporary hack for lockfile version mismatch, remove these lines after updating to 6.4.0 + sed -i.bak 's/"lockFileVersion": 1/"lockFileVersion": 3/' MODULE.bazel.lock + sed -i.bak 's/"--/"/g' MODULE.bazel.lock sed -i.bak 's/"moduleExtensions":/"moduleExtensions-old":/' MODULE.bazel.lock rm MODULE.bazel.lock.bak diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/AttributeValuesAdapter.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/AttributeValuesAdapter.java index f608becc00851f..248d1060975d91 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/AttributeValuesAdapter.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/AttributeValuesAdapter.java @@ -15,8 +15,11 @@ package com.google.devtools.build.lib.bazel.bzlmod; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.devtools.build.lib.cmdline.Label; import com.google.gson.Gson; +import com.google.gson.GsonBuilder; import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonNull; @@ -40,7 +43,7 @@ /** Helps serialize/deserialize {@link AttributeValues}, which contains Starlark values. */ public class AttributeValuesAdapter extends TypeAdapter { - private final Gson gson = new Gson(); + private final Gson gson = new GsonBuilder().disableHtmlEscaping().create(); @Override public void write(JsonWriter out, AttributeValues attributeValues) throws IOException { @@ -129,32 +132,51 @@ private Object deserializeObject(JsonElement json) { } } + @VisibleForTesting static final String STRING_ESCAPE_SEQUENCE = "'"; + /** - * Serializes an object (Label or String) to String A label is converted to a String as it is. A - * String is being modified with a delimiter to be easily differentiated from the label when - * deserializing. + * Serializes an object (Label or String) to String. A label is converted to a String as it is. A + * String that looks like a label is escaped so that it can be differentiated from a label when + * deserializing, otherwise it is emitted as is. * * @param obj String or Label * @return serialized object */ private String serializeObjToString(Object obj) { if (obj instanceof Label) { - return ((Label) obj).getUnambiguousCanonicalForm(); + String labelString = ((Label) obj).getUnambiguousCanonicalForm(); + Preconditions.checkState(labelString.startsWith("@@")); + return labelString; + } + String string = (String) obj; + // Strings that start with "@@" need to be escaped to avoid being interpreted as a label. We + // escape by wrapping the string in the escape sequence and strip one layer of this sequence + // during deserialization, so strings that happen to already start and end with the escape + // sequence also have to be escaped. + if (string.startsWith("@@") + || (string.startsWith(STRING_ESCAPE_SEQUENCE) && string.endsWith(STRING_ESCAPE_SEQUENCE))) { + return STRING_ESCAPE_SEQUENCE + string + STRING_ESCAPE_SEQUENCE; } - return "--" + obj; + return string; } /** - * Deserializes a string to either a label or a String depending on the prefix. A string will have - * a delimiter at the start, else it is converted to a label. + * Deserializes a string to either a label or a String depending on the prefix and presence of the + * escape sequence. * * @param value String to be deserialized * @return Object of type String of Label */ private Object deserializeStringToObject(String value) { - if (value.startsWith("--")) { - return value.substring(2); + // A string represents a label if and only if it starts with "@@". + if (value.startsWith("@@")) { + return Label.parseCanonicalUnchecked(value); + } + // Strings that start and end with the escape sequence always require one layer to be stripped. + if (value.startsWith(STRING_ESCAPE_SEQUENCE) && value.endsWith(STRING_ESCAPE_SEQUENCE)) { + return value.substring( + STRING_ESCAPE_SEQUENCE.length(), value.length() - STRING_ESCAPE_SEQUENCE.length()); } - return Label.parseCanonicalUnchecked(value); + return value; } } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BazelLockFileValue.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BazelLockFileValue.java index 727ac37525a99d..21b02c479b76e7 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BazelLockFileValue.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BazelLockFileValue.java @@ -37,7 +37,7 @@ @GenerateTypeAdapter public abstract class BazelLockFileValue implements SkyValue, Postable { - public static final int LOCK_FILE_VERSION = 2; + public static final int LOCK_FILE_VERSION = 3; @SerializationConstant public static final SkyKey KEY = () -> SkyFunctions.BAZEL_LOCK_FILE; diff --git a/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/AttributeValuesAdapterTest.java b/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/AttributeValuesAdapterTest.java index 8ae80136ea764f..45fcbd7d322ea6 100644 --- a/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/AttributeValuesAdapterTest.java +++ b/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/AttributeValuesAdapterTest.java @@ -16,6 +16,7 @@ package com.google.devtools.build.lib.bazel.bzlmod; import static com.google.common.truth.Truth.assertThat; +import static com.google.devtools.build.lib.bazel.bzlmod.AttributeValuesAdapter.STRING_ESCAPE_SEQUENCE; import com.google.devtools.build.lib.cmdline.Label; import com.google.devtools.build.lib.testutil.FoundationTestCase; @@ -43,7 +44,23 @@ public void testAttributeValuesAdapter() throws IOException { Label l2 = Label.parseCanonicalUnchecked("@//foo:tar"); dict.put("Integer", StarlarkInt.of(56)); dict.put("Boolean", false); - dict.put("String", "Hello"); + dict.put("String", "Hello String"); + dict.put("StringWithAngleBrackets", ""); + dict.put( + "LabelLikeString", + StarlarkList.of(Mutability.IMMUTABLE, "@//foo:bar", ":baz", "@@//baz:quz")); + dict.put( + "StringsWithEscapeSequence", + StarlarkList.of( + Mutability.IMMUTABLE, + "@@//foo:bar" + STRING_ESCAPE_SEQUENCE, + STRING_ESCAPE_SEQUENCE + "@@//foo:bar", + STRING_ESCAPE_SEQUENCE + "@@//foo:bar" + STRING_ESCAPE_SEQUENCE, + STRING_ESCAPE_SEQUENCE + + STRING_ESCAPE_SEQUENCE + + "@@//foo:bar" + + STRING_ESCAPE_SEQUENCE + + STRING_ESCAPE_SEQUENCE)); dict.put("Label", l1); dict.put( "ListOfInts", StarlarkList.of(Mutability.IMMUTABLE, StarlarkInt.of(1), StarlarkInt.of(2))); @@ -66,6 +83,10 @@ public void testAttributeValuesAdapter() throws IOException { attributeValues = attrAdapter.read(new JsonReader(stringReader)); } + // Verify that the JSON string does not contain any escaped angle brackets. + assertThat(jsonString).doesNotContain("\\u003c"); + // Verify that the String "Hello String" is preserved as is, without any additional escaping. + assertThat(jsonString).contains(":\"Hello String\""); assertThat((Map) attributeValues.attributes()).containsExactlyEntriesIn(builtDict); } } diff --git a/src/test/shell/bazel/bazel_determinism_test.sh b/src/test/shell/bazel/bazel_determinism_test.sh index 2885304e37edbc..f6c7588acaff53 100755 --- a/src/test/shell/bazel/bazel_determinism_test.sh +++ b/src/test/shell/bazel/bazel_determinism_test.sh @@ -73,7 +73,8 @@ function test_determinism() { new_hash=$(shasum -a 256 "src/MODULE.tools" | awk '{print $1}') sed -i.bak "/\"bazel_tools\":/s/\"[a-f0-9]*\"/\"$new_hash\"/" MODULE.bazel.lock # TODO: Temporary hack for lockfile version mismatch, remove these two lines after updating to 6.4.0 - sed -i.bak 's/"lockFileVersion": 1/"lockFileVersion": 2/' MODULE.bazel.lock + sed -i.bak 's/"lockFileVersion": 1/"lockFileVersion": 3/' MODULE.bazel.lock + sed -i.bak 's/"--/"/g' MODULE.bazel.lock sed -i.bak 's/"moduleExtensions":/"moduleExtensions-old":/' MODULE.bazel.lock rm MODULE.bazel.lock.bak diff --git a/src/tools/bzlmod/utils.bzl b/src/tools/bzlmod/utils.bzl index d080fcea73126a..79a5dc16c90806 100644 --- a/src/tools/bzlmod/utils.bzl +++ b/src/tools/bzlmod/utils.bzl @@ -30,9 +30,9 @@ def extract_url(attributes): The url extracted from the given attributes. """ if "urls" in attributes: - return attributes["urls"][0][2:] + return attributes["urls"][0].removeprefix("--") elif "url" in attributes: - return attributes["url"][2:] + return attributes["url"].removeprefix("--") else: fail("Could not find url in attributes %s" % attributes) @@ -49,7 +49,10 @@ def parse_http_artifacts(ctx, lockfile_path, required_repos): A list of http artifacts in the form of [{"integrity": , "url": }, {"sha256": , "url": }, ...] - All lockfile string values are prefixed with `--`, hence the `[2:]` is needed to remove the prefix. + All lockfile string values in version 2, but not version 3, are prefixed with `--`, hence the + `.removeprefix("--")` is needed to remove the prefix if it exists. This is a heuristic as + version 3 strings could start with `--`, but that is unlikely. + TODO: Remove this hack after the release of Bazel 6.4.0. """ lockfile = json.decode(ctx.read(lockfile_path)) http_artifacts = [] @@ -58,21 +61,21 @@ def parse_http_artifacts(ctx, lockfile_path, required_repos): if "repoSpec" in module and module["repoSpec"]["ruleClassName"] == "http_archive": repo_spec = module["repoSpec"] attributes = repo_spec["attributes"] - repo_name = attributes["name"][2:] + repo_name = attributes["name"].removeprefix("--") if repo_name not in required_repos: continue found_repos.append(repo_name) http_artifacts.append({ - "integrity": attributes["integrity"][2:], + "integrity": attributes["integrity"].removeprefix("--"), "url": extract_url(attributes), }) if "remote_patches" in attributes: for patch, integrity in attributes["remote_patches"].items(): http_artifacts.append({ - "integrity": integrity[2:], - "url": patch[2:], + "integrity": integrity.removeprefix("--"), + "url": patch.removeprefix("--"), }) for _, extension in lockfile["moduleExtensions"].items(): @@ -81,14 +84,14 @@ def parse_http_artifacts(ctx, lockfile_path, required_repos): rule_class = repo_spec["ruleClassName"] if rule_class == "http_archive" or rule_class == "http_file" or rule_class == "http_jar": attributes = repo_spec["attributes"] - repo_name = attributes["name"][2:] + repo_name = attributes["name"].removeprefix("--") if repo_name not in required_repos: continue found_repos.append(repo_name) http_artifacts.append({ - "sha256": attributes["sha256"][2:], + "sha256": attributes["sha256"].removeprefix("--"), "url": extract_url(attributes), })