forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NVPTX] extend type support for nvvm.{min,max,mulhi,sad} (llvm#78385)
Ensure intrinsics and auto-upgrades support i16, i32, and i64 for for `nvvm.{min,max,mulhi,sad}` - `nvvm.min` and `nvvm.max`: These are auto-upgraded to `select` instructions but it is still nice to support the 16 bit variants just in case any generators of IR are still trying to use these intrinsics. - `nvvm.sad` added both the 16 and 64 bit variants, also marked this instruction as speculateble. These directly correspond to the PTX `sad.{u16,s16,u64,s64}` instructions. - `nvvm.mulhi` added the 16 bit variants. These directly correspond to the PTX `mul.hi.{s,u}16` instructions.
- Loading branch information
1 parent
c772754
commit b44fed8
Showing
6 changed files
with
312 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 | ||
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_50 | FileCheck %s | ||
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_50 | %ptxas-verify %} | ||
|
||
define i16 @test_mulhi_i16(i16 %x, i16 %y) { | ||
; CHECK-LABEL: test_mulhi_i16( | ||
; CHECK: { | ||
; CHECK-NEXT: .reg .b16 %rs<4>; | ||
; CHECK-NEXT: .reg .b32 %r<2>; | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: ld.param.u16 %rs1, [test_mulhi_i16_param_0]; | ||
; CHECK-NEXT: ld.param.u16 %rs2, [test_mulhi_i16_param_1]; | ||
; CHECK-NEXT: mul.hi.s16 %rs3, %rs1, %rs2; | ||
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3; | ||
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; | ||
; CHECK-NEXT: ret; | ||
%1 = call i16 @llvm.nvvm.mulhi.s(i16 %x, i16 %y) | ||
ret i16 %1 | ||
} | ||
|
||
define i16 @test_mulhi_u16(i16 %x, i16 %y) { | ||
; CHECK-LABEL: test_mulhi_u16( | ||
; CHECK: { | ||
; CHECK-NEXT: .reg .b16 %rs<4>; | ||
; CHECK-NEXT: .reg .b32 %r<2>; | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: ld.param.u16 %rs1, [test_mulhi_u16_param_0]; | ||
; CHECK-NEXT: ld.param.u16 %rs2, [test_mulhi_u16_param_1]; | ||
; CHECK-NEXT: mul.hi.u16 %rs3, %rs1, %rs2; | ||
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3; | ||
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; | ||
; CHECK-NEXT: ret; | ||
%1 = call i16 @llvm.nvvm.mulhi.us(i16 %x, i16 %y) | ||
ret i16 %1 | ||
} | ||
|
||
define i32 @test_mulhi_i32(i32 %x, i32 %y) { | ||
; CHECK-LABEL: test_mulhi_i32( | ||
; CHECK: { | ||
; CHECK-NEXT: .reg .b32 %r<4>; | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: ld.param.u32 %r1, [test_mulhi_i32_param_0]; | ||
; CHECK-NEXT: ld.param.u32 %r2, [test_mulhi_i32_param_1]; | ||
; CHECK-NEXT: mul.hi.s32 %r3, %r1, %r2; | ||
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; | ||
; CHECK-NEXT: ret; | ||
%1 = call i32 @llvm.nvvm.mulhi.i(i32 %x, i32 %y) | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @test_mulhi_u32(i32 %x, i32 %y) { | ||
; CHECK-LABEL: test_mulhi_u32( | ||
; CHECK: { | ||
; CHECK-NEXT: .reg .b32 %r<4>; | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: ld.param.u32 %r1, [test_mulhi_u32_param_0]; | ||
; CHECK-NEXT: ld.param.u32 %r2, [test_mulhi_u32_param_1]; | ||
; CHECK-NEXT: mul.hi.u32 %r3, %r1, %r2; | ||
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; | ||
; CHECK-NEXT: ret; | ||
%1 = call i32 @llvm.nvvm.mulhi.ui(i32 %x, i32 %y) | ||
ret i32 %1 | ||
} | ||
|
||
define i64 @test_mulhi_i64(i64 %x, i64 %y) { | ||
; CHECK-LABEL: test_mulhi_i64( | ||
; CHECK: { | ||
; CHECK-NEXT: .reg .b64 %rd<4>; | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: ld.param.u64 %rd1, [test_mulhi_i64_param_0]; | ||
; CHECK-NEXT: ld.param.u64 %rd2, [test_mulhi_i64_param_1]; | ||
; CHECK-NEXT: mul.hi.s64 %rd3, %rd1, %rd2; | ||
; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd3; | ||
; CHECK-NEXT: ret; | ||
%1 = call i64 @llvm.nvvm.mulhi.ll(i64 %x, i64 %y) | ||
ret i64 %1 | ||
} | ||
|
||
define i64 @test_mulhi_u64(i64 %x, i64 %y) { | ||
; CHECK-LABEL: test_mulhi_u64( | ||
; CHECK: { | ||
; CHECK-NEXT: .reg .b64 %rd<4>; | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: ld.param.u64 %rd1, [test_mulhi_u64_param_0]; | ||
; CHECK-NEXT: ld.param.u64 %rd2, [test_mulhi_u64_param_1]; | ||
; CHECK-NEXT: mul.hi.u64 %rd3, %rd1, %rd2; | ||
; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd3; | ||
; CHECK-NEXT: ret; | ||
%1 = call i64 @llvm.nvvm.mulhi.ull(i64 %x, i64 %y) | ||
ret i64 %1 | ||
} | ||
|
||
declare i16 @llvm.nvvm.mulhi.s(i16, i16) | ||
declare i16 @llvm.nvvm.mulhi.us(i16, i16) | ||
declare i32 @llvm.nvvm.mulhi.i(i32, i32) | ||
declare i32 @llvm.nvvm.mulhi.ui(i32, i32) | ||
declare i64 @llvm.nvvm.mulhi.ll(i64, i64) | ||
declare i64 @llvm.nvvm.mulhi.ull(i64, i64) |
Oops, something went wrong.