Skip to content

Commit

Permalink
Add patches back
Browse files Browse the repository at this point in the history
  • Loading branch information
qihqi committed Oct 5, 2023
1 parent 63c5a13 commit 7de99ad
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 0 deletions.
2 changes: 2 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ http_archive(
"//openxla_patches:cache_urls.diff",
"//openxla_patches:constexpr_return.diff",
"//openxla_patches:gpu_build_file.diff",
"//openxla_patches:gpu_race_condition.diff",
"//openxla_patches:f16_abi_clang.diff",
],
strip_prefix = "xla-7a19856d74569fd1f765cd03bdee84e3b1fdc579",
urls = [
Expand Down
19 changes: 19 additions & 0 deletions openxla_patches/f16_abi_clang.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
upstream CI will fail without this
diff --git a/xla/service/cpu/runtime_fp16.h b/xla/service/cpu/runtime_fp16.h
index 3f7af5197..ce4491c5d 100644
--- a/xla/service/cpu/runtime_fp16.h
+++ b/xla/service/cpu/runtime_fp16.h
@@ -18,12 +18,7 @@ limitations under the License.

#include <stdint.h>

-// _Float16 always gets us the correct ABI type, so use that if available.
-// AArch64 GCC defines __FLT16_MANT_DIG__ even when _Float16 is not available.
-#if defined(__FLT16_MANT_DIG__) && \
- (defined(__clang__) || !(defined(__GNUC__) && defined(__aarch64__)))
-using XlaF16ABIType = _Float16;
-#elif defined(__x86_64__)
+#if defined(__x86_64__)
// Older versions of Clang don't have _Float16. Since both float and _Float16
// are passed in the same register we can use the wider type and careful casting
// to conform to x86_64 psABI. This only works with the assumption that we're
14 changes: 14 additions & 0 deletions openxla_patches/gpu_race_condition.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
diff --git a/xla/service/gpu/gpu_executable.cc b/xla/service/gpu/gpu_executable.cc
index 242961dd1..787275868 100644
--- a/xla/service/gpu/gpu_executable.cc
+++ b/xla/service/gpu/gpu_executable.cc
@@ -563,8 +563,7 @@ StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStreamImpl(
}

// Force synchronous execution if the allocator requires it.
- const bool block_host_until_done =
- !memory_allocator->AllowsAsynchronousDeallocation();
+ const bool block_host_until_done = true;


// Lock the GPU with a shared lock so that we don't interfere with autotuning

0 comments on commit 7de99ad

Please sign in to comment.