-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][SME]Update intrinsic interface for read/write #65594
[AArch64][SME]Update intrinsic interface for read/write #65594
Conversation
The new ACLE PR#225[1] now combines the slice parameters for some builtins. This patch is the llvm#2 of 3 patches to update the interface. Slice specifies the ZA slice number directly and needs to be explicity implemented by the "user" with the base register plus the immediate offset [1]https://github.com/ARM-software/acle/pull/225/files
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with nit addressed
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
@@ -9553,12 +9553,8 @@ Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags, | |||
Function *F = CGM.getIntrinsic(IntID, VecTy); | |||
if (TypeFlags.isReadZA()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: you can remove the curly braces now.
@llvm/pr-subscribers-clang ChangesThe new ACLE PR#225[1] now combines the slice parameters for some builtins. This patch is the #2 of 3 patches to update the interface. Slice specifies the ZA slice number directly and needs to be explicity implemented by the "user" with the base register plus the immediate offset [1]https://github.com/ARM-software/acle/pull/225/filesPatch is 111.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/65594.diff 5 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 0eb1e647bf03eaa..ea3cee8c5918275 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -96,42 +96,42 @@ def SVSTR_ZA : MInst<"svstr_za", "vm%", "", multiclass ZARead ch> { let TargetGuard = "sme" in { - def NAME # _H : SInst<"svread_hor_" # n_suffix # "[_{d}]", "ddPimi", t, + def NAME # _H : SInst<"svread_hor_" # n_suffix # "[_{d}]", "ddPim", t, MergeOp1, i_prefix # "_horiz", [IsReadZA, IsStreaming, IsSharedZA, IsPreservesZA], ch>; - def NAME # _V : SInst<"svread_ver_" # n_suffix # "[_{d}]", "ddPimi", t, + def NAME # _V : SInst<"svread_ver_" # n_suffix # "[_{d}]", "ddPim", t, MergeOp1, i_prefix # "_vert", [IsReadZA, IsStreaming, IsSharedZA, IsPreservesZA], ch>; } } -defm SVREAD_ZA8 : ZARead<"za8", "cUc", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_0>, ImmCheck<4, ImmCheck0_15>]>; -defm SVREAD_ZA16 : ZARead<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_1>, ImmCheck<4, ImmCheck0_7>]>; -defm SVREAD_ZA32 : ZARead<"za32", "iUif", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; -defm SVREAD_ZA64 : ZARead<"za64", "lUld", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_7>, ImmCheck<4, ImmCheck0_1>]>; -defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>, ImmCheck<4, ImmCheck0_0>]>; +defm SVREAD_ZA8 : ZARead<"za8", "cUc", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_0>]>; +defm SVREAD_ZA16 : ZARead<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_1>]>; +defm SVREAD_ZA32 : ZARead<"za32", "iUif", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_3>]>; +defm SVREAD_ZA64 : ZARead<"za64", "lUld", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_7>]>; +defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>]>; //////////////////////////////////////////////////////////////////////////////// // Write horizontal/vertical ZA slices multiclass ZAWrite ch> { let TargetGuard = "sme" in { - def NAME # _H : SInst<"svwrite_hor_" # n_suffix # "[_{d}]", "vimiPd", t, + def NAME # _H : SInst<"svwrite_hor_" # n_suffix # "[_{d}]", "vimPd", t, MergeOp1, i_prefix # "_horiz", [IsWriteZA, IsStreaming, IsSharedZA], ch>; - def NAME # _V : SInst<"svwrite_ver_" # n_suffix # "[_{d}]", "vimiPd", t, + def NAME # _V : SInst<"svwrite_ver_" # n_suffix # "[_{d}]", "vimPd", t, MergeOp1, i_prefix # "_vert", [IsWriteZA, IsStreaming, IsSharedZA], ch>; } } -defm SVWRITE_ZA8 : ZAWrite<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; -defm SVWRITE_ZA16 : ZAWrite<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; -defm SVWRITE_ZA32 : ZAWrite<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; -defm SVWRITE_ZA64 : ZAWrite<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; -defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>; +defm SVWRITE_ZA8 : ZAWrite<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; +defm SVWRITE_ZA16 : ZAWrite<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>; +defm SVWRITE_ZA32 : ZAWrite<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>; +defm SVWRITE_ZA64 : ZAWrite<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>; +defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>]>; //////////////////////////////////////////////////////////////////////////////// // SME - Zero diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 1ee0c469af9ee8b..0e7ad19a72be215 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9619,12 +9619,8 @@ Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags, Function *F = CGM.getIntrinsic(IntID, VecTy); if (TypeFlags.isReadZA()) { Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy); - Ops[3] = EmitTileslice(Ops[4], Ops[3]); - Ops.erase(&Ops[4]); } else if (TypeFlags.isWriteZA()) { - Ops[1] = EmitTileslice(Ops[2], Ops[1]); - Ops[2] = EmitSVEPredicateCast(Ops[3], VecTy); - Ops.erase(&Ops[3]); + Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy); } return Builder.CreateCall(F, Ops); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c index 19e2b42e13f2d64..f7a0852387e8951 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c @@ -20,7 +20,7 @@ // CHECK-NEXT: ret [[TMP0]] // svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base, 0); + return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za8_s8_1( @@ -31,7 +31,8 @@ svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { // CHECK-NEXT: ret [[TMP0]] // svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base, 15); + uint32_t slice = slice_base + 15; + return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice); } // CHECK-C-LABEL: @test_svread_hor_za16_s16( @@ -42,19 +43,20 @@ svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-NEXT: ret [[TMP1]] // svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base, 0); + return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za16_s16_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_s16_1u11__SVInt16_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice_base, 7); + uint32_t slice = slice_base + 7; + return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice); } // CHECK-C-LABEL: @test_svread_hor_za32_s32( @@ -65,19 +67,20 @@ svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b // CHECK-NEXT: ret [[TMP1]] // svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base, 0); + return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za32_s32_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_s32_1u11__SVInt32_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice_base, 3); + uint32_t slice = slice_base + 3; + return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice); } // CHECK-C-LABEL: @test_svread_hor_za64_s64( @@ -88,19 +91,20 @@ svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b // CHECK-NEXT: ret [[TMP1]] // svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base, 0); + return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za64_s64_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_s64_1u11__SVInt64_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice_base, 1); + uint32_t slice = slice_base + 1; + return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice); } // CHECK-C-LABEL: @test_svread_hor_za8_u8( @@ -110,7 +114,7 @@ svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b // CHECK-NEXT: ret [[TMP0]] // svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base, 0); + return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za8_u8_1( @@ -121,7 +125,8 @@ svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-NEXT: ret [[TMP0]] // svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base, 15); + uint32_t slice = slice_base + 15; + return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice); } // CHECK-C-LABEL: @test_svread_hor_za16_u16( @@ -132,19 +137,20 @@ svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-NEXT: ret [[TMP1]] // svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base, 0); + return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za16_u16_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_u16_1u12__SVUint16_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice_base, 7); + uint32_t slice = slice_base + 7; + return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice); } // CHECK-C-LABEL: @test_svread_hor_za32_u32( @@ -155,19 +161,20 @@ svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice // CHECK-NEXT: ret [[TMP1]] // svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base, 0); + return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za32_u32_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_u32_1u12__SVUint32_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice_base, 3); + uint32_t slice = slice_base + 3; + return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice); } // CHECK-C-LABEL: @test_svread_hor_za64_u64( @@ -178,19 +185,20 @@ svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice // CHECK-NEXT: ret [[TMP1]] // svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base, 0); + return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za64_u64_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_u64_1u12__SVUint64_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice_base, 1); + uint32_t slice = slice_base + 1; + return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice); } // CHECK-C-LABEL: @test_svread_hor_za16_f16( @@ -201,19 +209,20 @@ svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice // CHECK-NEXT: ret [[TMP1]] // svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base, 0); + return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za16_f16_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_f16_1u13__SVFloat16_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice_base, 7); + uint32_t slice = slice_base + 7; + return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice); } // CHECK-C-LABEL: @test_svread_hor_za16_bf16( @@ -224,19 +233,20 @@ svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli // CHECK-NEXT: ret [[TMP1]] // svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base, 0); + return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za16_bf16_1( // CHECK-CXX-LABEL: @_Z27test_svread_hor_za16_bf16_1u14__SVBFloat16_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice_base, 7); + uint32_t slice = slice_base + 7; + return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice); } // CHECK-C-LABEL: @test_svread_hor_za32_f32( @@ -247,19 +257,20 @@ svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-NEXT: ret [[TMP1]] // svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base, 0); + return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za32_f32_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_f32_1u13__SVFloat32_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za32,... |
The new ACLE PR#225[1] now combines the slice parameters for some builtins. This patch is the llvm#2 of 3 patches to update the interface. Slice specifies the ZA slice number directly and needs to be explicity implemented by the "user" with the base register plus the immediate offset [1]https://github.com/ARM-software/acle/pull/225/files
The new ACLE PR#225[1] now combines the slice parameters for some builtins. This patch is the #2 of 3 patches to update the interface.
Slice specifies the ZA slice number directly and needs to be explicity implemented by the "user" with the base register plus the immediate offset
[1]https://github.com/ARM-software/acle/pull/225/files