Support bf16 searchsort op (#99426)

Per title, needed to unblock bf16 for an ads tformer workload Differential Revision: [D45088972](https://our.internmc.facebook.com/intern/diff/D45088972/) Pull Request resolved: #99426 Approved by: https://github.com/XilunWu
pytorch · Apr 19, 2023 · fcd2e8c · fcd2e8c
1 parent b3b0fbc
commit fcd2e8c
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 4 deletions.
diff --git a/aten/src/ATen/native/cuda/Bucketization.cu b/aten/src/ATen/native/cuda/Bucketization.cu
@@ -116,12 +116,12 @@ void dispatch(
     bool right,
     const Tensor& sorter) {
   if (!out_int32) {
-    AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, input.scalar_type(), "searchsorted_out_cuda", [&] {
+    AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, input.scalar_type(), "searchsorted_out_cuda", [&] {
       searchsorted_cuda_contiguous<scalar_t, int64_t>(result, input, boundaries, right, sorter);
     });
   }
   else {
-    AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, input.scalar_type(), "searchsorted_out_cuda", [&] {
+    AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, input.scalar_type(), "searchsorted_out_cuda", [&] {
       searchsorted_cuda_contiguous<scalar_t, int>(result, input, boundaries, right, sorter);
     });
   }

diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
@@ -16086,7 +16086,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1):
            )),
     OpInfo('bucketize',
            dtypes=all_types_and(torch.float16, torch.bfloat16),
-           dtypesIfCUDA=all_types_and(torch.float16),
+           dtypesIfCUDA=all_types_and(torch.bfloat16, torch.float16),
            sample_inputs_func=sample_inputs_bucketize,
            reference_inputs_func=reference_inputs_bucketize,
            supports_autograd=False,
@@ -16096,7 +16096,7 @@ def reference_flatten(input, start_dim=0, end_dim=-1):
            )),
     OpInfo('searchsorted',
            dtypes=all_types_and(torch.bfloat16, torch.float16),
-           dtypesIfCUDA=all_types_and(torch.float16),
+           dtypesIfCUDA=all_types_and(torch.bfloat16, torch.float16),
            sample_inputs_func=sample_inputs_searchsorted,
            supports_autograd=False,
            ref=reference_searchsorted,