diff --git a/src/BUILD b/src/BUILD index 1830c0e07990a8..8d5bbb61ad6bd0 100644 --- a/src/BUILD +++ b/src/BUILD @@ -179,20 +179,43 @@ filegroup( visibility = ["//src/test/shell/bazel:__pkg__"], ) +# This program patches the app manifest of the java.exe launcher to force its +# active code page to UTF-8 on Windows 1903 and later, which is required for +# proper support of Unicode characters outside the system code page. +# The JDK currently (as of JDK 23) doesn't support this natively: +# https://mail.openjdk.org/pipermail/core-libs-dev/2024-November/133773.html +cc_binary( + name = "patch_java_manifest_for_utf8", + srcs = ["patch_java_manifest_for_utf8.cc"], + tags = ["manual"], + target_compatible_with = ["@platforms//os:windows"], +) + sh_binary( name = "minimize_jdk", srcs = ["minimize_jdk.sh"], + data = select({ + "@platforms//os:windows": [ + ":patch_java_manifest_for_utf8", + ], + "//conditions:default": [], + }), + deps = [ + "@bazel_tools//tools/bash/runfiles", + ], ) genrule( name = "embedded_jdk_minimal", srcs = [ ":embedded_jdk_vanilla", - ":minimize_jdk.sh", ":jdeps_modules.golden", ], outs = ["minimal_jdk.zip"], - cmd = "$(location :minimize_jdk.sh) $(location :embedded_jdk_vanilla) $(location :jdeps_modules.golden) $(OUTS)", + cmd = "$(location :minimize_jdk) $(location :embedded_jdk_vanilla) $(location :jdeps_modules.golden) $(OUTS)", + tools = [ + ":minimize_jdk", + ], visibility = ["//src/test/shell/bazel:__pkg__"], ) @@ -200,11 +223,13 @@ genrule( name = "embedded_jdk_allmodules", srcs = [ ":embedded_jdk_vanilla", - ":minimize_jdk.sh", ":jdeps_modules.golden", ], outs = ["allmodules_jdk.zip"], - cmd = "$(location :minimize_jdk.sh) --allmodules $(location :embedded_jdk_vanilla) $(location :jdeps_modules.golden) $(OUTS)", + cmd = "$(location :minimize_jdk) --allmodules $(location :embedded_jdk_vanilla) $(location :jdeps_modules.golden) $(OUTS)", + tools = [ + ":minimize_jdk", + ], visibility = ["//src/test/shell/bazel:__pkg__"], ) diff --git a/src/minimize_jdk.sh b/src/minimize_jdk.sh index afeeb65a1c64fb..06e0ad17438015 100755 --- a/src/minimize_jdk.sh +++ b/src/minimize_jdk.sh @@ -17,7 +17,17 @@ # This script creates from the full JDK a minimized version that only contains # the specified JDK modules. -set -euo pipefail +# --- begin runfiles.bash initialization v3 --- +# Copy-pasted from the Bazel Bash runfiles library v3. +set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash +# shellcheck disable=SC1090 +source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \ + source "$0.runfiles/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e +# --- end runfiles.bash initialization v3 --- if [ "$1" == "--allmodules" ]; then shift @@ -54,6 +64,7 @@ if [[ "$UNAME" =~ msys_nt* ]]; then ./bin/jlink --module-path ./jmods/ --add-modules "$modules" \ --vm=server --strip-debug --no-man-pages \ --output reduced + "$(rlocation "io_bazel/src/patch_java_manifest_for_utf8.exe")" reduced/bin/java.exe cp $DOCS legal/java.base/ASSEMBLY_EXCEPTION \ reduced/ # These are necessary for --host_jvm_debug to work. diff --git a/src/patch_java_manifest_for_utf8.cc b/src/patch_java_manifest_for_utf8.cc new file mode 100644 index 00000000000000..0a3b6d9ce597e3 --- /dev/null +++ b/src/patch_java_manifest_for_utf8.cc @@ -0,0 +1,102 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +#define WIN32_LEAN_AND_MEAN +#include + +// This program patches the app manifest of the java.exe launcher to force its +// active code page to UTF-8 on Windows 1903 and later. +// https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page#set-a-process-code-page-to-utf-8 +// +// This is necessary because the launcher sets sun.jnu.encoding to the system +// code page, which by default is a legacy code page such as Cp1252 on Windows. +// This causes the JVM to be unable to interact with files whose paths contain +// Unicode characters not representable in the system code page, as well as +// command-line arguments and environment variables containing such characters. +// +// Usage in the libjava.dll code: +// https://github.com/openjdk/jdk/blob/e7f0bf11ff0e89b6b156d5e88ca3771c706aa46a/src/java.base/windows/native/libjava/java_props_md.c#L63-L65 +int wmain(int argc, wchar_t *argv[]) { + if (argc != 2) { + fwprintf(stderr, L"Usage: %ls \n", argv[0]); + return 1; + } + + // Read the app manifest (aka side-by-side or fusion manifest) from the + // executable, which requires loading it as a "module". + HMODULE exe = LoadLibraryExW(argv[1], nullptr, LOAD_LIBRARY_AS_DATAFILE); + if (!exe) { + fwprintf(stderr, L"Error loading file %ls: %d\n", argv[1], GetLastError()); + return 1; + } + HRSRC manifest_resource = FindResourceA(exe, MAKEINTRESOURCE(1), RT_MANIFEST); + if (!manifest_resource) { + fwprintf(stderr, L"Resource not found: %d\n", GetLastError()); + return 1; + } + HGLOBAL manifest_handle = LoadResource(exe, manifest_resource); + if (!manifest_handle) { + fwprintf(stderr, L"Error loading resource: %d\n", GetLastError()); + return 1; + } + LPVOID manifest_data = LockResource(manifest_handle); + if (!manifest_data) { + fwprintf(stderr, L"Error locking resource: %d\n", GetLastError()); + return 1; + } + DWORD manifest_len = SizeofResource(exe, manifest_resource); + std::string manifest((char *) manifest_data, manifest_len); + UnlockResource(manifest_handle); + FreeResource(manifest_handle); + FreeLibrary(exe); + + // Insert the activeCodePage element into the manifest at the end of the + // windowsSettings element. + // https://github.com/openjdk/jdk/blob/29882bfe7b7e76446a96862cd0a5e81c7e054415/src/java.base/windows/native/launcher/java.manifest#L43 + std::size_t insert_pos = manifest.find(""); + if (insert_pos == std::wstring::npos) { + fwprintf(stderr, L"End tag not found in manifest:\n%hs", manifest.c_str()); + return 1; + } + std::string new_manifest = manifest.substr(0, insert_pos) + + "UTF-8" + + manifest.substr(insert_pos); + + // Write back the modified app manifest. + HANDLE update_handle = BeginUpdateResourceW(argv[1], false); + if (!update_handle) { + fwprintf(stderr, L"Error opening file %ls for update: %d\n", argv[1], GetLastError()); + return 1; + } + if (!UpdateResourceA(update_handle, + RT_MANIFEST, + MAKEINTRESOURCE(1), + MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL), + const_cast(new_manifest.c_str()), + new_manifest.size())) { + fwprintf(stderr, L"Error updating resource: %d\n", GetLastError()); + return 1; + } + if (!EndUpdateResourceW(update_handle, false)) { + fwprintf(stderr, L"Error finalizing update: %d\n", GetLastError()); + return 1; + } + + return 0; +} diff --git a/src/test/shell/bazel/BUILD b/src/test/shell/bazel/BUILD index 256c8f4ae4043d..4295ef2c8bf9df 100644 --- a/src/test/shell/bazel/BUILD +++ b/src/test/shell/bazel/BUILD @@ -726,6 +726,7 @@ sh_test( genquery( name = "embedded_tools_deps", expression = "kind(\"cc_(binary|library)\", deps(//src:embedded_tools_jdk_allmodules_srcs))", + opts = ["--notool_deps"], scope = ["//src:embedded_tools_jdk_allmodules_srcs"], )