Skip to content

Commit

Permalink
Fix various UTF-8 encoding bugs
Browse files Browse the repository at this point in the history
Fixes missing reencodings from Bazel's internal representation to UTF-8 or vice versa:
* environment variables for locally executed spawns when the JVM uses a UTF-8 locale
* output file paths for remote cache uploads
* symlink targets

Also renames the encoding/decoding methods to make it more clear which one is correct for which situation.

Closes #23421.

PiperOrigin-RevId: 668825046
Change-Id: Ifd88c5eaa217c0698bd453b0554c8bee80994d13
  • Loading branch information
fmeum authored and copybara-github committed Aug 29, 2024
1 parent c5c5db4 commit d91e05e
Show file tree
Hide file tree
Showing 11 changed files with 213 additions and 111 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ private static void writeContentUtf8(OutputStream outputStream, Iterable<String>
byte[] bytes = stringUnsafe.getByteArray(line);
if (stringUnsafe.getCoder(line) == StringUnsafe.LATIN1 && isAscii(bytes)) {
outputStream.write(bytes);
} else if (!StringUtil.decodeBytestringUtf8(line).equals(line)) {
} else if (!StringUtil.reencodeInternalToExternal(line).equals(line)) {
// We successfully decoded line from utf8 - meaning it was already encoded as utf8.
// We do not want to double-encode.
outputStream.write(bytes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
package com.google.devtools.build.lib.query2.aquery;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.devtools.build.lib.util.StringUtil.decodeBytestringUtf8;
import static com.google.devtools.build.lib.util.StringUtil.reencodeInternalToExternal;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -373,7 +373,7 @@ public static String escapeBytestringUtf8(String maybeUtf8) {
return maybeUtf8;
}

final String decoded = decodeBytestringUtf8(maybeUtf8);
final String decoded = reencodeInternalToExternal(maybeUtf8);
final StringBuilder sb = new StringBuilder(decoded.length() * 8);
decoded
.codePoints()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
import static com.google.devtools.build.lib.remote.util.Utils.grpcAwareErrorMessage;
import static com.google.devtools.build.lib.remote.util.Utils.shouldUploadLocalResultsToRemoteCache;
import static com.google.devtools.build.lib.remote.util.Utils.waitForBulkTransfer;
import static com.google.devtools.build.lib.util.StringUtil.decodeBytestringUtf8;
import static com.google.devtools.build.lib.util.StringUtil.encodeBytestringUtf8;
import static com.google.devtools.build.lib.util.StringUtil.reencodeExternalToInternal;
import static com.google.devtools.build.lib.util.StringUtil.reencodeInternalToExternal;

import build.bazel.remote.execution.v2.Action;
import build.bazel.remote.execution.v2.ActionResult;
Expand All @@ -53,7 +53,6 @@
import com.github.benmanes.caffeine.cache.Caffeine;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -250,7 +249,8 @@ private Command buildCommand(
ArrayList<String> outputDirectories = new ArrayList<>();
ArrayList<String> outputPaths = new ArrayList<>();
for (ActionInput output : outputs) {
String pathString = decodeBytestringUtf8(remotePathResolver.localPathToOutputPath(output));
String pathString =
reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(output));
if (output.isDirectory()) {
outputDirectories.add(pathString);
} else {
Expand All @@ -272,22 +272,18 @@ private Command buildCommand(
if (spawnScrubber != null) {
arg = spawnScrubber.transformArgument(arg);
}
command.addArguments(decodeBytestringUtf8(arg));
command.addArguments(reencodeInternalToExternal(arg));
}
// Sorting the environment pairs by variable name.
TreeSet<String> variables = new TreeSet<>(env.keySet());
for (String var : variables) {
command
.addEnvironmentVariablesBuilder()
.setName(decodeBytestringUtf8(var))
.setValue(decodeBytestringUtf8(env.get(var)));
.setName(reencodeInternalToExternal(var))
.setValue(reencodeInternalToExternal(env.get(var)));
}

String workingDirectory = remotePathResolver.getWorkingDirectory();
if (!Strings.isNullOrEmpty(workingDirectory)) {
command.setWorkingDirectory(decodeBytestringUtf8(workingDirectory));
}
return command.build();
return command.setWorkingDirectory(remotePathResolver.getWorkingDirectory()).build();
}

private static boolean useRemoteCache(RemoteOptions options) {
Expand Down Expand Up @@ -802,12 +798,12 @@ private ListenableFuture<FileMetadata> downloadFile(
ListenableFuture<Void> future =
remoteCache.downloadFile(
context,
remotePathResolver.localPathToOutputPath(file.path()),
reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(file.path())),
tmpPath,
file.digest(),
new RemoteCache.DownloadProgressReporter(
progressStatusListener,
remotePathResolver.localPathToOutputPath(file.path()),
reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(file.path())),
file.digest().getSizeBytes()));
return transform(future, (d) -> file, directExecutor());
} catch (IOException e) {
Expand Down Expand Up @@ -1034,9 +1030,9 @@ ActionResultMetadata parseActionResultMetadata(
Map<Path, ListenableFuture<Tree>> dirMetadataDownloads =
Maps.newHashMapWithExpectedSize(result.getOutputDirectoriesCount());
for (OutputDirectory dir : result.getOutputDirectories()) {
var outputPath = encodeBytestringUtf8(dir.getPath());
var outputPath = dir.getPath();
dirMetadataDownloads.put(
remotePathResolver.outputPathToLocalPath(outputPath),
remotePathResolver.outputPathToLocalPath(reencodeExternalToInternal(outputPath)),
Futures.transformAsync(
remoteCache.downloadBlob(context, outputPath, dir.getTreeDigest()),
(treeBytes) ->
Expand All @@ -1062,7 +1058,8 @@ ActionResultMetadata parseActionResultMetadata(
ImmutableMap.Builder<Path, FileMetadata> files = ImmutableMap.builder();
for (OutputFile outputFile : result.getOutputFiles()) {
Path localPath =
remotePathResolver.outputPathToLocalPath(encodeBytestringUtf8(outputFile.getPath()));
remotePathResolver.outputPathToLocalPath(
reencodeExternalToInternal(outputFile.getPath()));
files.put(
localPath,
new FileMetadata(localPath, outputFile.getDigest(), outputFile.getIsExecutable()));
Expand All @@ -1076,8 +1073,8 @@ ActionResultMetadata parseActionResultMetadata(
result.getOutputSymlinks());
for (var symlink : outputSymlinks) {
var localPath =
remotePathResolver.outputPathToLocalPath(encodeBytestringUtf8(symlink.getPath()));
var target = PathFragment.create(symlink.getTarget());
remotePathResolver.outputPathToLocalPath(reencodeExternalToInternal(symlink.getPath()));
var target = PathFragment.create(reencodeExternalToInternal(symlink.getTarget()));
var existingMetadata = symlinkMap.get(localPath);
if (existingMetadata != null) {
if (!target.equals(existingMetadata.target())) {
Expand Down Expand Up @@ -1463,11 +1460,9 @@ public SpawnResult waitForAndReuseOutputs(RemoteAction action, LocalExecution pr
Map<Path, Path> realToTmpPath = new HashMap<>();
try {
for (String output : action.getCommand().getOutputPathsList()) {
String reencodedOutput = reencodeExternalToInternal(output);
Path sourcePath =
previousExecution
.action
.getRemotePathResolver()
.outputPathToLocalPath(encodeBytestringUtf8(output));
previousExecution.action.getRemotePathResolver().outputPathToLocalPath(reencodedOutput);
ActionInput outputArtifact = previousOutputs.get(sourcePath);
Path tmpPath = tempPathGenerator.generateTempPath();
tmpPath.getParentDirectory().createDirectoryAndParents();
Expand All @@ -1481,8 +1476,7 @@ public SpawnResult waitForAndReuseOutputs(RemoteAction action, LocalExecution pr
FileSystemUtils.copyFile(sourcePath, tmpPath);
}

Path targetPath =
action.getRemotePathResolver().outputPathToLocalPath(encodeBytestringUtf8(output));
Path targetPath = action.getRemotePathResolver().outputPathToLocalPath(reencodedOutput);
realToTmpPath.put(targetPath, tmpPath);
} catch (FileNotFoundException e) {
// The spawn this action was deduplicated against failed to create an output file. If the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import static com.google.devtools.build.lib.remote.util.RxFutures.toSingle;
import static com.google.devtools.build.lib.remote.util.RxUtils.mergeBulkTransfer;
import static com.google.devtools.build.lib.remote.util.RxUtils.toTransferResult;
import static com.google.devtools.build.lib.util.StringUtil.reencodeInternalToExternal;
import static java.util.Comparator.comparing;
import static java.util.Comparator.naturalOrder;
import static java.util.Comparator.reverseOrder;
Expand Down Expand Up @@ -337,8 +338,8 @@ public Digest getStderrDigest() {
private void addFileSymbolicLink(Path file, PathFragment target) {
OutputSymlink outputSymlink =
OutputSymlink.newBuilder()
.setPath(remotePathResolver.localPathToOutputPath(file))
.setTarget(target.toString())
.setPath(reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(file)))
.setTarget(reencodeInternalToExternal(target.toString()))
.build();
result.addOutputFileSymlinks(outputSymlink);
result.addOutputSymlinks(outputSymlink);
Expand All @@ -347,17 +348,17 @@ private void addFileSymbolicLink(Path file, PathFragment target) {
private void addDirectorySymbolicLink(Path file, PathFragment target) {
OutputSymlink outputSymlink =
OutputSymlink.newBuilder()
.setPath(remotePathResolver.localPathToOutputPath(file))
.setTarget(target.toString())
.setPath(reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(file)))
.setTarget(reencodeInternalToExternal(target.toString()))
.build();
result.addOutputDirectorySymlinks(outputSymlink);
result.addOutputSymlinks(outputSymlink);
}

private void addFile(Digest digest, Path file) throws IOException {
private void addFile(Digest digest, Path file) {
result
.addOutputFilesBuilder()
.setPath(remotePathResolver.localPathToOutputPath(file))
.setPath(reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(file)))
.setDigest(digest)
.setIsExecutable(true);

Expand Down Expand Up @@ -563,7 +564,7 @@ private void addDirectory(Path dir) throws ExecException, IOException, Interrupt

result
.addOutputDirectoriesBuilder()
.setPath(remotePathResolver.localPathToOutputPath(dir))
.setPath(reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(dir)))
.setTreeDigest(treeDigest)
.setIsTopologicallySorted(true);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.
package com.google.devtools.build.lib.remote.merkletree;

import static com.google.devtools.build.lib.util.StringUtil.decodeBytestringUtf8;
import static com.google.devtools.build.lib.util.StringUtil.reencodeInternalToExternal;

import build.bazel.remote.execution.v2.Digest;
import build.bazel.remote.execution.v2.Directory;
Expand Down Expand Up @@ -398,7 +398,7 @@ private static MerkleTree buildMerkleTree(
private static FileNode buildProto(DirectoryTree.FileNode file) {
var node =
FileNode.newBuilder()
.setName(decodeBytestringUtf8(file.getPathSegment()))
.setName(reencodeInternalToExternal(file.getPathSegment()))
.setDigest(file.getDigest())
.setIsExecutable(file.isExecutable());
if (file.isToolInput()) {
Expand All @@ -409,15 +409,15 @@ private static FileNode buildProto(DirectoryTree.FileNode file) {

private static DirectoryNode buildProto(String baseName, MerkleTree dir) {
return DirectoryNode.newBuilder()
.setName(decodeBytestringUtf8(baseName))
.setName(reencodeInternalToExternal(baseName))
.setDigest(dir.getRootDigest())
.build();
}

private static SymlinkNode buildProto(DirectoryTree.SymlinkNode symlink) {
return SymlinkNode.newBuilder()
.setName(decodeBytestringUtf8(symlink.getPathSegment()))
.setTarget(decodeBytestringUtf8(symlink.getTarget()))
.setName(reencodeInternalToExternal(symlink.getPathSegment()))
.setTarget(reencodeInternalToExternal(symlink.getTarget()))
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,15 @@

package com.google.devtools.build.lib.shell;

import static com.google.common.collect.ImmutableList.toImmutableList;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.devtools.build.lib.shell.SubprocessBuilder.StreamAction;
import com.google.devtools.build.lib.util.StringUtil;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.ProcessBuilder.Redirect;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
Expand Down Expand Up @@ -147,18 +146,33 @@ private synchronized Process start(ProcessBuilder builder) throws IOException {
@Override
public Subprocess create(SubprocessBuilder params) throws IOException {
ProcessBuilder builder = new ProcessBuilder();
ImmutableList<String> argv = params.getArgv();
if (Runtime.version().feature() >= 19
&& Objects.equals(System.getProperty("sun.jnu.encoding"), "UTF-8")) {
// On JDK 19 and newer, java.lang.ProcessImpl#start encodes argv using sun.jnu.encoding, so if
// sun.jnu.encoding is set to UTF-8, our argv needs to be UTF-8. (Note that on some platforms,
// for example on macOS, sun.jnu.encoding is hard-coded in the JVM as UTF-8.)
argv = argv.stream().map(StringUtil::decodeBytestringUtf8).collect(toImmutableList());
// On JDK 19 and newer, java.lang.ProcessImpl#start encodes argv and the environment using
// sun.jnu.encoding, so if sun.jnu.encoding is set to UTF-8, our argv needs to be UTF-8. (Note
// that on some platforms, for example on macOS, sun.jnu.encoding is hard-coded in the JVM as
// UTF-8.)
boolean reencodeToUtf8 =
Runtime.version().feature() >= 19
&& Objects.equals(System.getProperty("sun.jnu.encoding"), "UTF-8");
List<String> argv = params.getArgv();
if (reencodeToUtf8) {
argv = Lists.transform(argv, StringUtil::reencodeInternalToExternal);
}
builder.command(argv);
if (params.getEnv() != null) {
builder.environment().clear();
builder.environment().putAll(params.getEnv());
if (reencodeToUtf8) {
params
.getEnv()
.forEach(
(key, value) ->
builder
.environment()
.put(
StringUtil.reencodeInternalToExternal(key),
StringUtil.reencodeInternalToExternal(value)));
} else {
builder.environment().putAll(params.getEnv());
}
}

builder.redirectOutput(getRedirect(params.getStdout(), params.getStdoutFile()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ public static String ordinal(int number) {
* <p>The return value is suitable for passing to Protobuf string fields or printing to the
* terminal.
*/
public static String decodeBytestringUtf8(String maybeUtf8) {
public static String reencodeInternalToExternal(String maybeUtf8) {
if (maybeUtf8.chars().allMatch(c -> c < 128)) {
return maybeUtf8;
}
Expand Down Expand Up @@ -173,9 +173,9 @@ public static String decodeBytestringUtf8(String maybeUtf8) {
*
* <p>encodeBytestringUtf8("\u2049") == "\u00E2\u0081\u0089"
*
* <p>See {@link #decodeBytestringUtf8} for motivation.
* <p>See {@link #reencodeInternalToExternal} for motivation.
*/
public static String encodeBytestringUtf8(String unicode) {
public static String reencodeExternalToInternal(String unicode) {
if (unicode.chars().allMatch(c -> c < 128)) {
return unicode;
}
Expand Down
1 change: 1 addition & 0 deletions src/test/java/com/google/devtools/build/lib/remote/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/util:abrupt_exit_exception",
"//src/main/java/com/google/devtools/build/lib/util:exit_code",
"//src/main/java/com/google/devtools/build/lib/util:os",
"//src/main/java/com/google/devtools/build/lib/util:string",
"//src/main/java/com/google/devtools/build/lib/util:temp_path_generator",
"//src/main/java/com/google/devtools/build/lib/util/io",
"//src/main/java/com/google/devtools/build/lib/util/io:io-proto",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import static com.google.devtools.build.lib.actions.ExecutionRequirements.REMOTE_EXECUTION_INLINE_OUTPUTS;
import static com.google.devtools.build.lib.remote.util.DigestUtil.toBinaryDigest;
import static com.google.devtools.build.lib.remote.util.Utils.getFromFuture;
import static com.google.devtools.build.lib.util.StringUtil.reencodeExternalToInternal;
import static com.google.devtools.build.lib.vfs.FileSystemUtils.readContent;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertThrows;
Expand Down Expand Up @@ -1546,7 +1547,7 @@ public void downloadOutputs_missingMandatoryOutputs_reportError() throws Excepti
ImmutableSet.Builder<Artifact> outputs = ImmutableSet.builder();
ImmutableList<String> expectedOutputFiles = ImmutableList.of("outputs/foo", "outputs/bar");
for (String outputFile : expectedOutputFiles) {
Path path = remotePathResolver.outputPathToLocalPath(outputFile);
Path path = remotePathResolver.outputPathToLocalPath(reencodeExternalToInternal(outputFile));
Artifact output = ActionsTestUtil.createArtifact(artifactRoot, path);
outputs.add(output);
}
Expand Down Expand Up @@ -2406,35 +2407,42 @@ private Spawn newSpawnFromResult(
ImmutableMap<String, String> executionInfo, RemoteActionResult result) {
ImmutableSet.Builder<Artifact> outputs = ImmutableSet.builder();
for (OutputFile file : result.getOutputFiles()) {
Path path = remotePathResolver.outputPathToLocalPath(file.getPath());
Path path =
remotePathResolver.outputPathToLocalPath(reencodeExternalToInternal(file.getPath()));
Artifact output = ActionsTestUtil.createArtifact(artifactRoot, path);
outputs.add(output);
}

for (OutputDirectory directory : result.getOutputDirectories()) {
Path path = remotePathResolver.outputPathToLocalPath(directory.getPath());
Path path =
remotePathResolver.outputPathToLocalPath(reencodeExternalToInternal(directory.getPath()));
Artifact output =
ActionsTestUtil.createTreeArtifactWithGeneratingAction(
artifactRoot, path.relativeTo(execRoot));
outputs.add(output);
}

for (OutputSymlink fileSymlink : result.getOutputFileSymlinks()) {
Path path = remotePathResolver.outputPathToLocalPath(fileSymlink.getPath());
Path path =
remotePathResolver.outputPathToLocalPath(
reencodeExternalToInternal(fileSymlink.getPath()));
Artifact output = ActionsTestUtil.createArtifact(artifactRoot, path);
outputs.add(output);
}

for (OutputSymlink directorySymlink : result.getOutputDirectorySymlinks()) {
Path path = remotePathResolver.outputPathToLocalPath(directorySymlink.getPath());
Path path =
remotePathResolver.outputPathToLocalPath(
reencodeExternalToInternal(directorySymlink.getPath()));
Artifact output =
ActionsTestUtil.createTreeArtifactWithGeneratingAction(
artifactRoot, path.relativeTo(execRoot));
outputs.add(output);
}

for (OutputSymlink symlink : result.getOutputSymlinks()) {
Path path = remotePathResolver.outputPathToLocalPath(symlink.getPath());
Path path =
remotePathResolver.outputPathToLocalPath(reencodeExternalToInternal(symlink.getPath()));
Artifact output = ActionsTestUtil.createArtifact(artifactRoot, path);
outputs.add(output);
}
Expand Down
Loading

0 comments on commit d91e05e

Please sign in to comment.