diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD index 9236b95ccf9caf..b624864b06e4b6 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD @@ -51,6 +51,7 @@ java_library( "//src/main/java/com/google/devtools/common/options", "//src/main/java/net/starlark/java/eval", "//third_party:apache_commons_compress", + "//third_party:auto_service", "//third_party:auto_value", "//third_party:flogger", "//third_party:guava", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java index f38e7d56894ac2..c32a423940e0a1 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java @@ -15,8 +15,10 @@ package com.google.devtools.build.lib.bazel.repository; import static com.google.devtools.build.lib.bazel.repository.StripPrefixedPath.maybeDeprefixSymlink; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; -import com.google.common.base.Optional; +import com.google.auto.service.AutoService; import com.google.common.io.ByteStreams; import com.google.devtools.build.lib.bazel.repository.DecompressorValue.Decompressor; import com.google.devtools.build.lib.vfs.FileSystemUtils; @@ -25,16 +27,33 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.spi.CharsetProvider; +import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.Map; +import java.util.Optional; import java.util.Set; +import java.util.UUID; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; /** * Common code for unarchiving a compressed TAR file. + * + *

TAR file entries commonly use one of two formats: PAX, which uses UTF-8 encoding for all + * strings, and USTAR, which does not specify an encoding. This class interprets USTAR headers as + * latin-1, thus preserving the original bytes of the header without enforcing any particular + * encoding. Internally, for file system operations, all strings are converted into Bazel's internal + * representation of raw bytes stored as latin-1 strings. */ public abstract class CompressedTarFunction implements Decompressor { protected abstract InputStream getDecompressorStream(DecompressorDescriptor descriptor) @@ -54,20 +73,23 @@ public Path decompress(DecompressorDescriptor descriptor) Map symlinks = new HashMap<>(); try (InputStream decompressorStream = getDecompressorStream(descriptor)) { - TarArchiveInputStream tarStream = new TarArchiveInputStream(decompressorStream); + // USTAR tar headers use an unspecified encoding whereas PAX tar headers always use UTF-8. + // We can specify the encoding to use for USTAR headers, but the Charset used for PAX headers + // is fixed to UTF-8. We thus specify a custom Charset for the former so that we can + // distinguish between the two. + TarArchiveInputStream tarStream = + new TarArchiveInputStream(decompressorStream, MarkedIso88591Charset.NAME); TarArchiveEntry entry; while ((entry = tarStream.getNextTarEntry()) != null) { - String entryName = entry.getName(); + String entryName = toRawBytesString(entry.getName()); entryName = renameFiles.getOrDefault(entryName, entryName); - StripPrefixedPath entryPath = StripPrefixedPath.maybeDeprefix(entryName, prefix); + StripPrefixedPath entryPath = + StripPrefixedPath.maybeDeprefix(entryName.getBytes(ISO_8859_1), prefix); foundPrefix = foundPrefix || entryPath.foundPrefix(); if (prefix.isPresent() && !foundPrefix) { - Optional suggestion = - CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment()); - if (suggestion.isPresent()) { - availablePrefixes.add(suggestion.get()); - } + CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment()) + .ifPresent(availablePrefixes::add); } if (entryPath.skip()) { @@ -80,8 +102,11 @@ public Path decompress(DecompressorDescriptor descriptor) filePath.createDirectoryAndParents(); } else { if (entry.isSymbolicLink() || entry.isLink()) { - PathFragment targetName = PathFragment.create(entry.getLinkName()); - targetName = maybeDeprefixSymlink(targetName, prefix, descriptor.destinationPath()); + PathFragment targetName = + maybeDeprefixSymlink( + toRawBytesString(entry.getLinkName()).getBytes(ISO_8859_1), + prefix, + descriptor.destinationPath()); if (entry.isSymbolicLink()) { symlinks.put(filePath, targetName); } else { @@ -135,4 +160,100 @@ public Path decompress(DecompressorDescriptor descriptor) return descriptor.destinationPath(); } + + /** + * Returns a string that contains the raw bytes of the given string encoded in ISO-8859-1, + * assuming that the given string was encoded with either UTF-8 or the special {@link + * MarkedIso88591Charset}. + */ + private static String toRawBytesString(String name) { + // Marked strings are already encoded in ISO-8859-1. Other strings originate from PAX headers + // and are thus encoded in UTF-8, which we decode to the raw bytes and then re-encode trivially + // in ISO-8859-1. + return MarkedIso88591Charset.getRawBytesStringIfMarked(name) + .orElseGet(() -> new String(name.getBytes(UTF_8), ISO_8859_1)); + } + + /** A provider of {@link MarkedIso88591Charset}s. */ + @AutoService(CharsetProvider.class) + public static class MarkedIso88591CharsetProvider extends CharsetProvider { + private static final Charset CHARSET = new MarkedIso88591Charset(); + + @Override + public Iterator charsets() { + // This charset is only meant for internal use within CompressedTarFunction and thus should + // not be discoverable. + return Collections.emptyIterator(); + } + + @Override + public Charset charsetForName(String charsetName) { + return MarkedIso88591Charset.NAME.equals(charsetName) ? CHARSET : null; + } + } + + /** + * A charset that decodes ISO-8859-1, i.e., produces a String that contains the raw decoded bytes, + * and appends a marker to the end of the string to indicate that it was decoded with this + * charset. + */ + private static class MarkedIso88591Charset extends Charset { + // The name + // * must not collide with the name of any other charset. + // * must not appear in archive entry names by chance. + // * is internal to CompressedTarFunction. + // This is best served by a cryptographically random UUID, generated at startup. + private static final String NAME = UUID.randomUUID().toString(); + + private MarkedIso88591Charset() { + super(NAME, new String[0]); + } + + public static Optional getRawBytesStringIfMarked(String s) { + // Check for the marker in all positions as TarArchiveInputStream manipulates the raw name in + // certain cases (for example, appending a '/' to directory names). + if (s.contains(NAME)) { + return Optional.of(s.replaceAll(NAME, "")); + } + return Optional.empty(); + } + + @Override + public CharsetDecoder newDecoder() { + return new CharsetDecoder(this, 1, 1) { + @Override + protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + // A simple unoptimized ISO-8859-1 decoder. + while (in.hasRemaining()) { + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put((char) (in.get() & 0xFF)); + } + return CoderResult.UNDERFLOW; + } + + @Override + protected CoderResult implFlush(CharBuffer out) { + // Append the marker to the end of the buffer to indicate that it was decoded with this + // charset. + if (out.remaining() < NAME.length()) { + return CoderResult.OVERFLOW; + } + out.put(NAME); + return CoderResult.UNDERFLOW; + } + }; + } + + @Override + public CharsetEncoder newEncoder() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean contains(Charset cs) { + return false; + } + } } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorDescriptor.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorDescriptor.java index 2676fdba14d091..c40d5ab35f38da 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorDescriptor.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorDescriptor.java @@ -15,10 +15,10 @@ package com.google.devtools.build.lib.bazel.repository; import com.google.auto.value.AutoValue; -import com.google.common.base.Optional; import com.google.common.collect.ImmutableMap; import com.google.devtools.build.lib.vfs.Path; import java.util.Map; +import java.util.Optional; /** Description of an archive to be decompressed. */ @AutoValue diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java index e32a2decb18741..98ca4b4fa7e0ee 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java @@ -14,14 +14,17 @@ package com.google.devtools.build.lib.bazel.repository; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; + import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Optional; import com.google.devtools.build.lib.rules.repository.RepositoryFunction.RepositoryFunctionException; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.skyframe.SkyFunctionException.Transience; import com.google.devtools.build.skyframe.SkyValue; import java.io.IOException; +import java.util.Optional; import java.util.Set; import net.starlark.java.eval.Starlark; @@ -59,9 +62,14 @@ private static String prepareErrorMessage(String prefix, Set availablePr } public static Optional maybeMakePrefixSuggestion(PathFragment pathFragment) { - return pathFragment.isMultiSegment() - ? Optional.of(pathFragment.getSegment(0)) - : Optional.absent(); + if (!pathFragment.isMultiSegment()) { + return Optional.empty(); + } + String rawFirstSegment = pathFragment.getSegment(0); + // Users can only specify prefixes from Starlark, which is planned to use UTF-8 for all + // strings, but currently still collects the raw bytes in a latin-1 string. We thus + // optimistically decode the raw bytes with UTF-8 here for display purposes. + return Optional.of(new String(rawFirstSegment.getBytes(ISO_8859_1), UTF_8)); } } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java index 77c5815e79a50d..5852b4d0ea5303 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java @@ -14,11 +14,13 @@ package com.google.devtools.build.lib.bazel.repository; -import com.google.common.base.Optional; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.common.base.Preconditions; import com.google.devtools.build.lib.concurrent.ThreadSafety; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; +import java.util.Optional; /** * Utility class for removing a prefix from an archive's path. @@ -36,17 +38,19 @@ public final class StripPrefixedPath { * could cause collisions, if a zip file had one entry for bin/some-binary and another entry for * /bin/some-binary. * - * Note that the prefix is stripped to move the files up one level, so if you have an entry + *

Note that the prefix is stripped to move the files up one level, so if you have an entry * "foo/../bar" and a prefix of "foo", the result will be "bar" not "../bar". */ - public static StripPrefixedPath maybeDeprefix(String entry, Optional prefix) { + public static StripPrefixedPath maybeDeprefix(byte[] entry, Optional prefix) { Preconditions.checkNotNull(entry); PathFragment entryPath = relativize(entry); - if (!prefix.isPresent()) { + if (prefix.isEmpty()) { return new StripPrefixedPath(entryPath, false, false); } - PathFragment prefixPath = relativize(prefix.get()); + // Bazel parses Starlark files, which are the ultimate source of prefixes, as Latin-1 + // (ISO-8859-1). + PathFragment prefixPath = relativize(prefix.get().getBytes(ISO_8859_1)); boolean found = false; boolean skip = false; if (entryPath.startsWith(prefixPath)) { @@ -64,8 +68,8 @@ public static StripPrefixedPath maybeDeprefix(String entry, Optional pre /** * Normalize the path and, if it is absolute, make it relative (e.g., /foo/bar becomes foo/bar). */ - private static PathFragment relativize(String path) { - PathFragment entryPath = PathFragment.create(path); + private static PathFragment relativize(byte[] path) { + PathFragment entryPath = createPathFragment(path); if (entryPath.isAbsolute()) { entryPath = entryPath.toRelative(); } @@ -79,10 +83,10 @@ private StripPrefixedPath(PathFragment pathFragment, boolean found, boolean skip } public static PathFragment maybeDeprefixSymlink( - PathFragment linkPathFragment, Optional prefix, Path root) { - boolean wasAbsolute = linkPathFragment.isAbsolute(); + byte[] rawTarget, Optional prefix, Path root) { + boolean wasAbsolute = createPathFragment(rawTarget).isAbsolute(); // Strip the prefix from the link path if set. - linkPathFragment = maybeDeprefix(linkPathFragment.getPathString(), prefix).getPathFragment(); + PathFragment linkPathFragment = maybeDeprefix(rawTarget, prefix).getPathFragment(); if (wasAbsolute) { // Recover the path to an absolute path as maybeDeprefix() relativize the path // even if the prefix is not set @@ -103,4 +107,10 @@ public boolean skip() { return skip; } + static PathFragment createPathFragment(byte[] rawBytes) { + // Bazel internally represents paths as raw bytes by using the Latin-1 encoding, which has the + // property that (new String(bytes, ISO_8859_1)).getBytes(ISO_8859_1)) equals bytes for every + // byte array bytes. + return PathFragment.create(new String(rawBytes, ISO_8859_1)); + } } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipDecompressor.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipDecompressor.java index 8990e7c4c08e64..4a8f7709a5798b 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipDecompressor.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipDecompressor.java @@ -15,9 +15,9 @@ package com.google.devtools.build.lib.bazel.repository; import static com.google.devtools.build.lib.bazel.repository.StripPrefixedPath.maybeDeprefixSymlink; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.io.ByteStreams; import com.google.devtools.build.lib.bazel.repository.DecompressorValue.Decompressor; @@ -29,11 +29,11 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.nio.charset.Charset; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Map; +import java.util.Optional; import java.util.Set; import javax.annotation.Nullable; @@ -89,7 +89,8 @@ public Path decompress(DecompressorDescriptor descriptor) for (ZipFileEntry entry : entries) { String entryName = entry.getName(); entryName = renameFiles.getOrDefault(entryName, entryName); - StripPrefixedPath entryPath = StripPrefixedPath.maybeDeprefix(entryName, prefix); + StripPrefixedPath entryPath = + StripPrefixedPath.maybeDeprefix(entryName.getBytes(UTF_8), prefix); foundPrefix = foundPrefix || entryPath.foundPrefix(); if (entryPath.skip()) { continue; @@ -102,12 +103,9 @@ public Path decompress(DecompressorDescriptor descriptor) Set prefixes = new HashSet<>(); for (ZipFileEntry entry : entries) { StripPrefixedPath entryPath = - StripPrefixedPath.maybeDeprefix(entry.getName(), Optional.absent()); - Optional suggestion = - CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment()); - if (suggestion.isPresent()) { - prefixes.add(suggestion.get()); - } + StripPrefixedPath.maybeDeprefix(entry.getName().getBytes(UTF_8), Optional.empty()); + CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment()) + .ifPresent(prefixes::add); } throw new CouldNotFindPrefixException(prefix.get(), prefixes); } @@ -146,17 +144,22 @@ private static void extractZipEntry( // For symlinks, the "compressed data" is actually the target name. int read = reader.getInputStream(entry).read(buffer); Preconditions.checkState(read == buffer.length); - PathFragment target = PathFragment.create(new String(buffer, Charset.defaultCharset())); + + PathFragment target = StripPrefixedPath.createPathFragment(buffer); if (target.containsUplevelReferences()) { PathFragment pointsTo = strippedRelativePath.getParentDirectory().getRelative(target); if (pointsTo.containsUplevelReferences()) { - throw new IOException("Zip entries cannot refer to files outside of their directory: " - + reader.getFilename() + " has a symlink " + strippedRelativePath + " pointing to " - + target); + throw new IOException( + "Zip entries cannot refer to files outside of their directory: " + + reader.getFilename() + + " has a symlink " + + strippedRelativePath + + " pointing to " + + new String(buffer, UTF_8)); } } - target = maybeDeprefixSymlink(target, prefix, destinationDirectory); - symlinks.put(outputPath, target); + + symlinks.put(outputPath, maybeDeprefixSymlink(buffer, prefix, destinationDirectory)); } else { try (InputStream input = reader.getInputStream(entry); OutputStream output = outputPath.getOutputStream()) { diff --git a/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java b/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java index df1ea4db0fb19f..784b16348e0580 100644 --- a/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java +++ b/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java @@ -15,13 +15,14 @@ package com.google.devtools.build.lib.bazel.repository; import static com.google.common.truth.Truth.assertThat; +import static java.nio.charset.StandardCharsets.UTF_8; -import com.google.common.base.Optional; import com.google.devtools.build.lib.clock.BlazeClock; import com.google.devtools.build.lib.util.OS; import com.google.devtools.build.lib.vfs.DigestHashFunction; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.lib.vfs.inmemoryfs.InMemoryFileSystem; +import java.util.Optional; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -33,31 +34,32 @@ public class StripPrefixedPathTest { @Test public void testStrip() { - StripPrefixedPath result = StripPrefixedPath.maybeDeprefix("foo/bar", Optional.of("foo")); + StripPrefixedPath result = + StripPrefixedPath.maybeDeprefix("foo/bar".getBytes(UTF_8), Optional.of("foo")); assertThat(PathFragment.create("bar")).isEqualTo(result.getPathFragment()); assertThat(result.foundPrefix()).isTrue(); assertThat(result.skip()).isFalse(); - result = StripPrefixedPath.maybeDeprefix("foo", Optional.of("foo")); + result = StripPrefixedPath.maybeDeprefix("foo".getBytes(UTF_8), Optional.of("foo")); assertThat(result.skip()).isTrue(); - result = StripPrefixedPath.maybeDeprefix("bar/baz", Optional.of("foo")); + result = StripPrefixedPath.maybeDeprefix("bar/baz".getBytes(UTF_8), Optional.of("foo")); assertThat(result.foundPrefix()).isFalse(); - result = StripPrefixedPath.maybeDeprefix("foof/bar", Optional.of("foo")); + result = StripPrefixedPath.maybeDeprefix("foof/bar".getBytes(UTF_8), Optional.of("foo")); assertThat(result.foundPrefix()).isFalse(); } @Test public void testAbsolute() { - StripPrefixedPath result = StripPrefixedPath.maybeDeprefix( - "/foo/bar", Optional.absent()); + StripPrefixedPath result = + StripPrefixedPath.maybeDeprefix("/foo/bar".getBytes(UTF_8), Optional.empty()); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("foo/bar")); - result = StripPrefixedPath.maybeDeprefix("///foo/bar/baz", Optional.absent()); + result = StripPrefixedPath.maybeDeprefix("///foo/bar/baz".getBytes(UTF_8), Optional.empty()); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("foo/bar/baz")); - result = StripPrefixedPath.maybeDeprefix("/foo/bar/baz", Optional.of("/foo")); + result = StripPrefixedPath.maybeDeprefix("/foo/bar/baz".getBytes(UTF_8), Optional.of("/foo")); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("bar/baz")); } @@ -66,21 +68,21 @@ public void testWindowsAbsolute() { if (OS.getCurrent() != OS.WINDOWS) { return; } - StripPrefixedPath result = StripPrefixedPath.maybeDeprefix( - "c:/foo/bar", Optional.absent()); + StripPrefixedPath result = + StripPrefixedPath.maybeDeprefix("c:/foo/bar".getBytes(UTF_8), Optional.empty()); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("foo/bar")); } @Test public void testNormalize() { - StripPrefixedPath result = StripPrefixedPath.maybeDeprefix( - "../bar", Optional.absent()); + StripPrefixedPath result = + StripPrefixedPath.maybeDeprefix("../bar".getBytes(UTF_8), Optional.empty()); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("../bar")); - result = StripPrefixedPath.maybeDeprefix("foo/../baz", Optional.absent()); + result = StripPrefixedPath.maybeDeprefix("foo/../baz".getBytes(UTF_8), Optional.empty()); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("baz")); - result = StripPrefixedPath.maybeDeprefix("foo/../baz", Optional.of("foo")); + result = StripPrefixedPath.maybeDeprefix("foo/../baz".getBytes(UTF_8), Optional.of("foo")); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("baz")); } @@ -91,23 +93,23 @@ public void testDeprefixSymlink() { PathFragment relativeNoPrefix = StripPrefixedPath.maybeDeprefixSymlink( - PathFragment.create("a/b"), Optional.absent(), fileSystem.getPath("/usr")); + "a/b".getBytes(UTF_8), Optional.empty(), fileSystem.getPath("/usr")); // there is no attempt to get absolute path for the relative symlinks target path assertThat(relativeNoPrefix).isEqualTo(PathFragment.create("a/b")); PathFragment absoluteNoPrefix = StripPrefixedPath.maybeDeprefixSymlink( - PathFragment.create("/a/b"), Optional.absent(), fileSystem.getPath("/usr")); + "/a/b".getBytes(UTF_8), Optional.empty(), fileSystem.getPath("/usr")); assertThat(absoluteNoPrefix).isEqualTo(PathFragment.create("/usr/a/b")); PathFragment absolutePrefix = StripPrefixedPath.maybeDeprefixSymlink( - PathFragment.create("/root/a/b"), Optional.of("root"), fileSystem.getPath("/usr")); + "/root/a/b".getBytes(UTF_8), Optional.of("root"), fileSystem.getPath("/usr")); assertThat(absolutePrefix).isEqualTo(PathFragment.create("/usr/a/b")); PathFragment relativePrefix = StripPrefixedPath.maybeDeprefixSymlink( - PathFragment.create("root/a/b"), Optional.of("root"), fileSystem.getPath("/usr")); + "root/a/b".getBytes(UTF_8), Optional.of("root"), fileSystem.getPath("/usr")); // there is no attempt to get absolute path for the relative symlinks target path assertThat(relativePrefix).isEqualTo(PathFragment.create("a/b")); } diff --git a/src/test/shell/bazel/bazel_workspaces_test.sh b/src/test/shell/bazel/bazel_workspaces_test.sh index a33b443f17c143..28079bed1dd68f 100755 --- a/src/test/shell/bazel/bazel_workspaces_test.sh +++ b/src/test/shell/bazel/bazel_workspaces_test.sh @@ -451,6 +451,102 @@ function test_extract_rename_files() { ensure_output_contains_exactly_once "external/repo/out_dir/renamed-A.txt" "Second file: A" } +# Regression test for https://github.com/bazelbuild/bazel/issues/12986 +# Verifies that tar entries with PAX headers, which are always encoded in UTF-8, are extracted +# correctly. +function test_extract_pax_tar_non_ascii_utf8_file_names() { + local archive_tar="${TEST_TMPDIR}/pax.tar" + + pushd "${TEST_TMPDIR}" + mkdir "Ä_pax_∅" + echo "bar" > "Ä_pax_∅/Ä_foo_∅.txt" + tar --format=pax -cvf pax.tar "Ä_pax_∅" + popd + + set_workspace_command " + repository_ctx.extract('${archive_tar}', 'out_dir', 'Ä_pax_∅/')" + + build_and_process_log --exclude_rule "repository @local_config_cc" + + ensure_contains_exactly 'location: .*repos.bzl:3:25' 1 + ensure_contains_atleast 'context: "repository @repo"' 2 + ensure_contains_exactly 'extract_event' 1 + + ensure_output_contains_exactly_once "external/repo/out_dir/Ä_foo_∅.txt" "bar" +} + +# Verifies that tar entries with USTAR headers, for which an encoding isn't specified, are extracted +# correctly if that encoding happens to be UTF-8. +function test_extract_ustar_tar_non_ascii_utf8_file_names() { + local archive_tar="${TEST_TMPDIR}/ustar.tar" + + pushd "${TEST_TMPDIR}" + mkdir "Ä_ustar_∅" + echo "bar" > "Ä_ustar_∅/Ä_foo_∅.txt" + tar --format=ustar -cvf ustar.tar "Ä_ustar_∅" + popd + + set_workspace_command " + repository_ctx.extract('${archive_tar}', 'out_dir', 'Ä_ustar_∅/')" + + build_and_process_log --exclude_rule "repository @local_config_cc" + + ensure_contains_exactly 'location: .*repos.bzl:3:25' 1 + ensure_contains_atleast 'context: "repository @repo"' 2 + ensure_contains_exactly 'extract_event' 1 + + ensure_output_contains_exactly_once "external/repo/out_dir/Ä_foo_∅.txt" "bar" +} + +# Verifies that tar entries with USTAR headers, for which an encoding isn't specified, are extracted +# correctly if that encoding is not UTF-8. +function test_extract_ustar_tar_non_ascii_non_utf8_file_names() { + if is_darwin; then + echo "Skipping test on macOS due to lack of support for non-UTF-8 filenames" + return + fi + + local archive_tar="${TEST_TMPDIR}/ustar.tar" + + pushd "${TEST_TMPDIR}" + mkdir "Ä_ustar_latin1_∅" + echo "bar" > "$(echo -e 'Ä_ustar_latin1_∅/\xC4_foo_latin1_\xD6.txt')" + tar --format=ustar -cvf ustar.tar "Ä_ustar_latin1_∅" + popd + + set_workspace_command " + repository_ctx.extract('${archive_tar}', 'out_dir', 'Ä_ustar_latin1_∅/')" + + build_and_process_log --exclude_rule "repository @local_config_cc" + + ensure_contains_exactly 'location: .*repos.bzl:3:25' 1 + ensure_contains_atleast 'context: "repository @repo"' 2 + ensure_contains_exactly 'extract_event' 1 + + ensure_output_contains_exactly_once "$(echo -e 'external/repo/out_dir/\xC4_foo_latin1_\xD6.txt')" "bar" +} + +function test_extract_default_zip_non_ascii_utf8_file_names() { + local archive_tar="${TEST_TMPDIR}/default.zip" + + pushd "${TEST_TMPDIR}" + mkdir "Ä_default_∅" + echo "bar" > "Ä_default_∅/Ä_foo_∅.txt" + zip default.zip -r "Ä_default_∅" + popd + + set_workspace_command " + repository_ctx.extract('${archive_tar}', 'out_dir', 'Ä_default_∅/')" + + build_and_process_log --exclude_rule "repository @local_config_cc" + + ensure_contains_exactly 'location: .*repos.bzl:3:25' 1 + ensure_contains_atleast 'context: "repository @repo"' 2 + ensure_contains_exactly 'extract_event' 1 + + ensure_output_contains_exactly_once "external/repo/out_dir/Ä_foo_∅.txt" "bar" +} + function test_file() { set_workspace_command 'repository_ctx.file("filefile.sh", "echo filefile", True)' diff --git a/src/test/shell/integration/minimal_jdk_test.sh b/src/test/shell/integration/minimal_jdk_test.sh index e66e0c83d96f90..82f40fd9e63a94 100755 --- a/src/test/shell/integration/minimal_jdk_test.sh +++ b/src/test/shell/integration/minimal_jdk_test.sh @@ -42,13 +42,13 @@ export BAZEL_SUFFIX="_jdk_minimal" source "$(rlocation "io_bazel/src/test/shell/integration_test_setup.sh")" \ || { echo "integration_test_setup.sh not found!" >&2; exit 1; } -# Bazel's install base is < 366MB with minimal JDK and > 366MB with an all +# Bazel's install base is < 370MB with minimal JDK and > 370MB with an all # modules JDK. -function test_size_less_than_366MB() { +function test_size_less_than_370MB() { bazel info ib=$(bazel info install_base) size=$(du -s "$ib" | cut -d\ -f1) - maxsize=$((1024*366)) + maxsize=$((1024*370)) if [ $size -gt $maxsize ]; then echo "$ib was too big:" 1>&2 du -a "$ib" 1>&2