Skip to content

Commit

Permalink
[XLA:GPU] Fix compilation failures under MSVC.
Browse files Browse the repository at this point in the history
Two problems:
* 1L is not int64 sized on Windows, leading to a template argument deduction problem.
* MSVC limits string literals to 16K characters. However, you can use string literal concatenation to concatenate shorter literals into a long literal. Do that instead.

PiperOrigin-RevId: 563641264
  • Loading branch information
hawkinsp authored and copybara-github committed Sep 8, 2023
1 parent 417aad1 commit 13e9ee4
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 143 deletions.
3 changes: 2 additions & 1 deletion xla/service/gpu/buffer_comparator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,8 @@ static const char* buffer_compare_ptx = R"(
$L__BB1_9:
ret;
}
})"
R"(
// .globl __xla_fp16_comparison
.visible .entry __xla_fp16_comparison(
.param .u64 __xla_fp16_comparison_param_0,
Expand Down
281 changes: 142 additions & 139 deletions xla/service/gpu/hlo_op_profiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,142 +589,141 @@ constexpr char kDeviceHloOpProfiles[] = R"pb(
}
}
entries {
key: "sm_80"
value {
entries {
instruction {
opcode: "divide"
shape { element_type: S8 }
}
clock_cycles: 417
}
entries {
instruction {
opcode: "divide"
shape { element_type: S16 }
}
clock_cycles: 468
}
entries {
instruction {
opcode: "divide"
shape { element_type: S64 }
}
clock_cycles: 1094
}
entries {
instruction {
opcode: "divide"
shape { element_type: U8 }
}
clock_cycles: 420
}
entries {
instruction {
opcode: "power"
shape { element_type: U8 }
}
clock_cycles: 417
}
entries {
instruction {
opcode: "divide"
shape { element_type: U16 }
}
clock_cycles: 391
}
entries {
instruction {
opcode: "power"
shape { element_type: U16 }
}
clock_cycles: 454
}
entries {
instruction {
opcode: "divide"
shape { element_type: U64 }
}
clock_cycles: 908
}
entries {
instruction {
opcode: "power"
shape { element_type: U64 }
}
clock_cycles: 744
}
entries {
instruction {
opcode: "cbrt"
shape { element_type: F16 }
}
clock_cycles: 1195
}
entries {
instruction {
opcode: "log"
shape { element_type: F16 }
}
clock_cycles: 321
}
entries {
instruction {
opcode: "log-plus-one"
shape { element_type: F16 }
}
clock_cycles: 346
}
entries {
instruction {
opcode: "sqrt"
shape { element_type: F16 }
}
clock_cycles: 124
}
entries {
instruction {
opcode: "tanh"
shape { element_type: F16 }
}
clock_cycles: 499
}
entries {
instruction {
opcode: "log"
shape { element_type: F32 }
}
clock_cycles: 259
}
entries {
instruction {
opcode: "tanh"
shape { element_type: F32 }
}
clock_cycles: 504
}
entries {
instruction {
opcode: "power"
shape { element_type: F32 }
}
clock_cycles: 1221
}
entries {
instruction {
opcode: "cbrt"
shape { element_type: F64 }
}
clock_cycles: 1638
}
entries {
instruction {
opcode: "exponential-minus-one"
shape { element_type: F64 }
}
clock_cycles: 572
}
entries { key: "sm_80"
value { entries {
instruction {
opcode: "divide"
shape { element_type: S8 }
}
clock_cycles: 417
}
entries {
instruction {
opcode: "divide"
shape { element_type: S16 }
}
clock_cycles: 468
}
entries {
instruction {
opcode: "divide"
shape { element_type: S64 }
}
clock_cycles: 1094
}
entries {
instruction {
opcode: "divide"
shape { element_type: U8 }
}
clock_cycles: 420
}
entries {
instruction {
opcode: "power"
shape { element_type: U8 }
}
clock_cycles: 417
}
entries {
instruction {
opcode: "divide"
shape { element_type: U16 }
}
clock_cycles: 391
}
entries {
instruction {
opcode: "power"
shape { element_type: U16 }
}
clock_cycles: 454
}
entries {
instruction {
opcode: "divide"
shape { element_type: U64 }
}
clock_cycles: 908
}
entries {
instruction {
opcode: "power"
shape { element_type: U64 }
}
clock_cycles: 744
}
entries {
instruction {
opcode: "cbrt"
shape { element_type: F16 }
}
clock_cycles: 1195
}
entries {
instruction {
opcode: "log"
shape { element_type: F16 }
}
clock_cycles: 321
}
entries {
instruction {
opcode: "log-plus-one"
shape { element_type: F16 }
}
clock_cycles: 346
}
entries {
instruction {
opcode: "sqrt"
shape { element_type: F16 }
}
clock_cycles: 124
}
entries {
instruction {
opcode: "tanh"
shape { element_type: F16 }
}
clock_cycles: 499
}
entries {
instruction {
opcode: "log"
shape { element_type: F32 }
}
clock_cycles: 259
}
entries {
instruction {
opcode: "tanh"
shape { element_type: F32 }
}
clock_cycles: 504
}
entries {
instruction {
opcode: "power"
shape { element_type: F32 }
}
clock_cycles: 1221
}
entries {
instruction {
opcode: "cbrt"
shape { element_type: F64 }
}
clock_cycles: 1638
}
entries {
instruction {
opcode: "exponential-minus-one"
shape { element_type: F64 }
}
clock_cycles: 572
})pb"
R"pb(
entries {
instruction {
opcode: "log"
Expand Down Expand Up @@ -887,7 +886,7 @@ constexpr char kDeviceHloOpProfiles[] = R"pb(
clock_cycles: 6054
}
}
}
}
entries {
key: "sm_70"
Expand Down Expand Up @@ -1486,6 +1485,8 @@ constexpr char kDeviceHloOpProfiles[] = R"pb(
}
clock_cycles: 435
}
)pb"
R"pb(
entries {
instruction {
opcode: "log-plus-one"
Expand Down Expand Up @@ -1767,7 +1768,7 @@ constexpr char kDeviceHloOpProfiles[] = R"pb(
clock_cycles: 10458
}
}
}
}
entries {
key: "sm_75"
Expand Down Expand Up @@ -2038,6 +2039,8 @@ constexpr char kDeviceHloOpProfiles[] = R"pb(
}
clock_cycles: 74
}
)pb"
R"pb(
entries {
instruction {
opcode: "tanh"
Expand Down Expand Up @@ -2361,7 +2364,7 @@ constexpr char kDeviceHloOpProfiles[] = R"pb(
clock_cycles: 97
}
}
}
}
)pb";

} // namespace gpu
Expand Down
6 changes: 3 additions & 3 deletions xla/service/gpu/triton_autotuner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -740,9 +740,9 @@ std::vector<AutotuneResult::TritonGemmKey> GetPossibleMatmulAutotuneConfigs(
constexpr int kSufficientNumberOfTiles = 500;
const int max_split_k =
debug_options.xla_gpu_enable_split_k_autotuning()
? std::max(1L, kSufficientNumberOfTiles * kMaxTileSize *
kMaxTileSize /
ShapeUtil::ElementsIn(instr.shape()))
? std::max<int64_t>(1L, kSufficientNumberOfTiles * kMaxTileSize *
kMaxTileSize /
ShapeUtil::ElementsIn(instr.shape()))
: 1;
return exhaustive_tiling_search
? GetExhaustiveMatmulAutotuneConfigs(compute_capability,
Expand Down

0 comments on commit 13e9ee4

Please sign in to comment.