diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py index d24b6f2fd..a57dab545 100755 --- a/aarch64_linux/aarch64_wheel_ci_build.py +++ b/aarch64_linux/aarch64_wheel_ci_build.py @@ -106,6 +106,10 @@ def parse_arguments(): else: print("build pytorch without mkldnn backend") + # patch mkldnn to fix aarch64 mac and aws lambda crash + print("Applying mkl-dnn patch to fix crash due to /sys not accesible") + os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/fix-xbyak-failure.patch") + os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel") pytorch_wheel_name = complete_wheel("pytorch") print(f"Build Compelete. Created {pytorch_wheel_name}..") diff --git a/aarch64_linux/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py index 0ff286ad2..1615c78a6 100755 --- a/aarch64_linux/build_aarch64_wheel.py +++ b/aarch64_linux/build_aarch64_wheel.py @@ -556,6 +556,8 @@ def start_build(host: RemoteHost, *, build_ArmComputeLibrary(host, git_clone_flags) print("build pytorch with mkldnn+acl backend") build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON" + host.run_cmd("cd $HOME && git clone https://github.com/pytorch/builder.git") + host.run_cmd("cd $HOME/pytorch/third_party/ideep/mkl-dnn && patch -p1 < $HOME/builder/mkldnn_fix/fix-xbyak-failure.patch") # noqa: E501 host.run_cmd(f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}") # noqa: E501 print('Repair the wheel') pytorch_wheel_name = host.list_dir("pytorch/dist")[0] diff --git a/mkldnn_fix/fix-xbyak-failure.patch b/mkldnn_fix/fix-xbyak-failure.patch new file mode 100644 index 000000000..2ad278f0b --- /dev/null +++ b/mkldnn_fix/fix-xbyak-failure.patch @@ -0,0 +1,96 @@ +cpu: aarch64: fix xbyak functions for /sys access failures + +There are platforms with /sys not mounted. skip handling HW caps +for such platforms. + +This fixes the issue# pytorch/pytorch#115482 +--- + .../xbyak_aarch64/src/util_impl_linux.h | 24 ++++++++++++++----- + .../aarch64/xbyak_aarch64/src/util_impl_mac.h | 9 ++++--- + 2 files changed, 24 insertions(+), 9 deletions(-) + +diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h +index 2c7b28e58b..860a05700f 100644 +--- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h ++++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h +@@ -144,8 +144,13 @@ private: + regex_t regexBuf; + regmatch_t match[1]; + +- if (regcomp(®exBuf, regex, REG_EXTENDED) != 0) +- throw ERR_INTERNAL; ++ if (regcomp(®exBuf, regex, REG_EXTENDED) != 0) { ++ /* There are platforms with /sys not mounted. return empty buffers ++ * in these scenarios ++ */ ++ buf[0] = '\0'; ++ return 0; ++ } + + const int retVal = regexec(®exBuf, path, 1, match, 0); + regfree(®exBuf); +@@ -187,8 +192,12 @@ private: + regex_t regexBuf; + regmatch_t match[2]; + +- if (regcomp(®exBuf, "index[0-9]*$", REG_EXTENDED) != 0) +- throw ERR_INTERNAL; ++ if (regcomp(®exBuf, "index[0-9]*$", REG_EXTENDED) != 0) { ++ /* There are platforms with /sys not mounted. return gracefully ++ * in these scenarios ++ */ ++ goto init_and_return_false; ++ } + + if (regexec(®exBuf, dp->d_name, 1, match, 0) == 0) { // Found index[1-9][0-9]. directory + char *dir_name = buf0; +@@ -438,12 +447,15 @@ private: + + FILE *file = fopen(path_midr_el1, "r"); + if (file == nullptr) { +- throw Error(ERR_INTERNAL); ++ /* There are platforms with /sys not mounted. return empty buffer ++ * in these scenarios ++ */ ++ cacheInfo_.midr_el1 = 0xFE << 24; + return; + } + + if (fread(buf, sizeof(char), 64, file) == 0) { +- throw Error(ERR_INTERNAL); ++ cacheInfo_.midr_el1 = 0xFE << 24; + return; + } + +diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h +index ebd6dba7c0..93bdae1d7a 100644 +--- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h ++++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h +@@ -102,18 +102,21 @@ private: + size_t val = 0; + size_t len = sizeof(val); + ++ /* There are platforms with /sys not mounted. skip ++ * handling HW caps for such platforms. ++ */ + if (sysctlbyname(hw_opt_atomics, &val, &len, NULL, 0) != 0) +- throw Error(ERR_INTERNAL); ++ type_ = 0; + else + type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_ATOMIC : 0; + + if (sysctlbyname(hw_opt_fp, &val, &len, NULL, 0) != 0) +- throw Error(ERR_INTERNAL); ++ type_ = 0; + else + type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_FP : 0; + + if (sysctlbyname(hw_opt_neon, &val, &len, NULL, 0) != 0) +- throw Error(ERR_INTERNAL); ++ type_ = 0; + else + type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_ADVSIMD : 0; + } +-- +2.34.1 +