From e98987ebb48839ea652d63dfaa62ed841b426e46 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 18 Jan 2024 15:41:25 +0000 Subject: [PATCH 1/4] [AArch64][SME] Implement inline-asm clobbers for za/zt0 This enables specifing "za" or "zt0" to the clobber list for inline asm. This complies with the acle SME addition to the asm extension here: https://github.com/ARM-software/acle/pull/276 --- clang/lib/Basic/Targets/AArch64.cpp | 5 ++++- clang/test/CodeGen/aarch64-inline-asm.c | 8 ++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 8 ++++++++ llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 4 ++++ llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll | 16 ++++++++++++++++ 5 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index d47181bfca4fc8..781118c9358987 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1200,7 +1200,10 @@ const char *const AArch64TargetInfo::GCCRegNames[] = { // SVE predicate-as-counter registers "pn0", "pn1", "pn2", "pn3", "pn4", "pn5", "pn6", "pn7", "pn8", - "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15" + "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15", + + // SME registers + "za", "zt0", }; ArrayRef AArch64TargetInfo::getGCCRegNames() const { diff --git a/clang/test/CodeGen/aarch64-inline-asm.c b/clang/test/CodeGen/aarch64-inline-asm.c index 75e9a8c46b8769..8ddee560b11da4 100644 --- a/clang/test/CodeGen/aarch64-inline-asm.c +++ b/clang/test/CodeGen/aarch64-inline-asm.c @@ -95,3 +95,11 @@ void test_reduced_gpr_constraints(int var32, long var64) { // CHECK: [[ARG2:%.+]] = load i64, ptr // CHECK: call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i64 [[ARG2]]) } + +void test_sme_constraints(){ + asm("movt zt0[3, mul vl], z0" : : : "za"); +// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"() + + asm("movt zt0[3, mul vl], z0" : : : "zt0"); +// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"() +} \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 332fb37655288c..6a210846cf4dff 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10702,6 +10702,14 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( parseConstraintCode(Constraint) != AArch64CC::Invalid) return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass); + if (StringRef("{za}").equals_insensitive(Constraint)){ + return std::make_pair(unsigned(AArch64::ZA), &AArch64::MPRRegClass); + } + + if (StringRef("{zt0}").equals_insensitive(Constraint)){ + return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass); + } + // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair Res; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index ea9882160d6fb2..7d6b86ab8a3e95 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -507,6 +507,10 @@ bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF, MCRegisterInfo::regsOverlap(PhysReg, AArch64::X16)) return true; + // ZA/ZT0 registers are reserved but may be permitted in the clobber list. + if (PhysReg.id() == AArch64::ZA || PhysReg.id() == AArch64::ZT0) + return true; + return !isReservedReg(MF, PhysReg); } diff --git a/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll new file mode 100644 index 00000000000000..a8cba7dc9a91e9 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu -stop-after=aarch64-isel < %s -o - | FileCheck %s + +define void @alpha( %x) local_unnamed_addr { +entry: +; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $za + tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"() + ret void +} + +define void @beta( %x) local_unnamed_addr { +entry: +; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $zt0 + tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"() + ret void +} From 6391def8b7cfd88b12544766c94b75cb2a5bd385 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 29 Jan 2024 09:59:47 +0000 Subject: [PATCH 2/4] run clang-format --- clang/lib/Basic/Targets/AArch64.cpp | 249 ++++++++++++++++-- .../Target/AArch64/AArch64ISelLowering.cpp | 4 +- 2 files changed, 228 insertions(+), 25 deletions(-) diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 781118c9358987..e15242e79e26d9 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1165,45 +1165,248 @@ TargetInfo::BuiltinVaListKind AArch64TargetInfo::getBuiltinVaListKind() const { const char *const AArch64TargetInfo::GCCRegNames[] = { // 32-bit Integer registers - "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", - "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22", - "w23", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wsp", + "w0", + "w1", + "w2", + "w3", + "w4", + "w5", + "w6", + "w7", + "w8", + "w9", + "w10", + "w11", + "w12", + "w13", + "w14", + "w15", + "w16", + "w17", + "w18", + "w19", + "w20", + "w21", + "w22", + "w23", + "w24", + "w25", + "w26", + "w27", + "w28", + "w29", + "w30", + "wsp", // 64-bit Integer registers - "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", - "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", - "x23", "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp", + "x0", + "x1", + "x2", + "x3", + "x4", + "x5", + "x6", + "x7", + "x8", + "x9", + "x10", + "x11", + "x12", + "x13", + "x14", + "x15", + "x16", + "x17", + "x18", + "x19", + "x20", + "x21", + "x22", + "x23", + "x24", + "x25", + "x26", + "x27", + "x28", + "fp", + "lr", + "sp", // 32-bit floating point regsisters - "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", - "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", - "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", + "s0", + "s1", + "s2", + "s3", + "s4", + "s5", + "s6", + "s7", + "s8", + "s9", + "s10", + "s11", + "s12", + "s13", + "s14", + "s15", + "s16", + "s17", + "s18", + "s19", + "s20", + "s21", + "s22", + "s23", + "s24", + "s25", + "s26", + "s27", + "s28", + "s29", + "s30", + "s31", // 64-bit floating point regsisters - "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", - "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", - "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", + "d0", + "d1", + "d2", + "d3", + "d4", + "d5", + "d6", + "d7", + "d8", + "d9", + "d10", + "d11", + "d12", + "d13", + "d14", + "d15", + "d16", + "d17", + "d18", + "d19", + "d20", + "d21", + "d22", + "d23", + "d24", + "d25", + "d26", + "d27", + "d28", + "d29", + "d30", + "d31", // Neon vector registers - "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", - "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", - "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + "v0", + "v1", + "v2", + "v3", + "v4", + "v5", + "v6", + "v7", + "v8", + "v9", + "v10", + "v11", + "v12", + "v13", + "v14", + "v15", + "v16", + "v17", + "v18", + "v19", + "v20", + "v21", + "v22", + "v23", + "v24", + "v25", + "v26", + "v27", + "v28", + "v29", + "v30", + "v31", // SVE vector registers - "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9", "z10", - "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21", - "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", + "z0", + "z1", + "z2", + "z3", + "z4", + "z5", + "z6", + "z7", + "z8", + "z9", + "z10", + "z11", + "z12", + "z13", + "z14", + "z15", + "z16", + "z17", + "z18", + "z19", + "z20", + "z21", + "z22", + "z23", + "z24", + "z25", + "z26", + "z27", + "z28", + "z29", + "z30", + "z31", // SVE predicate registers - "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", - "p11", "p12", "p13", "p14", "p15", + "p0", + "p1", + "p2", + "p3", + "p4", + "p5", + "p6", + "p7", + "p8", + "p9", + "p10", + "p11", + "p12", + "p13", + "p14", + "p15", // SVE predicate-as-counter registers - "pn0", "pn1", "pn2", "pn3", "pn4", "pn5", "pn6", "pn7", "pn8", - "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15", + "pn0", + "pn1", + "pn2", + "pn3", + "pn4", + "pn5", + "pn6", + "pn7", + "pn8", + "pn9", + "pn10", + "pn11", + "pn12", + "pn13", + "pn14", + "pn15", // SME registers - "za", "zt0", + "za", + "zt0", }; ArrayRef AArch64TargetInfo::getGCCRegNames() const { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6a210846cf4dff..e86d57968a46fd 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10702,11 +10702,11 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( parseConstraintCode(Constraint) != AArch64CC::Invalid) return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass); - if (StringRef("{za}").equals_insensitive(Constraint)){ + if (StringRef("{za}").equals_insensitive(Constraint)) { return std::make_pair(unsigned(AArch64::ZA), &AArch64::MPRRegClass); } - if (StringRef("{zt0}").equals_insensitive(Constraint)){ + if (StringRef("{zt0}").equals_insensitive(Constraint)) { return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass); } From 4beffbf7dd9d52bd7b73f589a94ee92517a24e63 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Wed, 31 Jan 2024 23:09:05 +0000 Subject: [PATCH 3/4] Disable clang-format for GCCRegNames contents --- clang/lib/Basic/Targets/AArch64.cpp | 253 +++------------------------- 1 file changed, 27 insertions(+), 226 deletions(-) diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index e15242e79e26d9..dbf0fdad15dc88 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1164,249 +1164,50 @@ TargetInfo::BuiltinVaListKind AArch64TargetInfo::getBuiltinVaListKind() const { } const char *const AArch64TargetInfo::GCCRegNames[] = { + // clang-format off + // 32-bit Integer registers - "w0", - "w1", - "w2", - "w3", - "w4", - "w5", - "w6", - "w7", - "w8", - "w9", - "w10", - "w11", - "w12", - "w13", - "w14", - "w15", - "w16", - "w17", - "w18", - "w19", - "w20", - "w21", - "w22", - "w23", - "w24", - "w25", - "w26", - "w27", - "w28", - "w29", - "w30", - "wsp", + "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", + "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22", + "w23", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wsp", // 64-bit Integer registers - "x0", - "x1", - "x2", - "x3", - "x4", - "x5", - "x6", - "x7", - "x8", - "x9", - "x10", - "x11", - "x12", - "x13", - "x14", - "x15", - "x16", - "x17", - "x18", - "x19", - "x20", - "x21", - "x22", - "x23", - "x24", - "x25", - "x26", - "x27", - "x28", - "fp", - "lr", - "sp", + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", + "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", + "x23", "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp", // 32-bit floating point regsisters - "s0", - "s1", - "s2", - "s3", - "s4", - "s5", - "s6", - "s7", - "s8", - "s9", - "s10", - "s11", - "s12", - "s13", - "s14", - "s15", - "s16", - "s17", - "s18", - "s19", - "s20", - "s21", - "s22", - "s23", - "s24", - "s25", - "s26", - "s27", - "s28", - "s29", - "s30", - "s31", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", + "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", + "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", // 64-bit floating point regsisters - "d0", - "d1", - "d2", - "d3", - "d4", - "d5", - "d6", - "d7", - "d8", - "d9", - "d10", - "d11", - "d12", - "d13", - "d14", - "d15", - "d16", - "d17", - "d18", - "d19", - "d20", - "d21", - "d22", - "d23", - "d24", - "d25", - "d26", - "d27", - "d28", - "d29", - "d30", - "d31", + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", + "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", + "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", // Neon vector registers - "v0", - "v1", - "v2", - "v3", - "v4", - "v5", - "v6", - "v7", - "v8", - "v9", - "v10", - "v11", - "v12", - "v13", - "v14", - "v15", - "v16", - "v17", - "v18", - "v19", - "v20", - "v21", - "v22", - "v23", - "v24", - "v25", - "v26", - "v27", - "v28", - "v29", - "v30", - "v31", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", + "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", + "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", // SVE vector registers - "z0", - "z1", - "z2", - "z3", - "z4", - "z5", - "z6", - "z7", - "z8", - "z9", - "z10", - "z11", - "z12", - "z13", - "z14", - "z15", - "z16", - "z17", - "z18", - "z19", - "z20", - "z21", - "z22", - "z23", - "z24", - "z25", - "z26", - "z27", - "z28", - "z29", - "z30", - "z31", + "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9", "z10", + "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21", + "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", // SVE predicate registers - "p0", - "p1", - "p2", - "p3", - "p4", - "p5", - "p6", - "p7", - "p8", - "p9", - "p10", - "p11", - "p12", - "p13", - "p14", - "p15", + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", + "p11", "p12", "p13", "p14", "p15", // SVE predicate-as-counter registers - "pn0", - "pn1", - "pn2", - "pn3", - "pn4", - "pn5", - "pn6", - "pn7", - "pn8", - "pn9", - "pn10", - "pn11", - "pn12", - "pn13", - "pn14", - "pn15", + "pn0", "pn1", "pn2", "pn3", "pn4", "pn5", "pn6", "pn7", "pn8", + "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15", // SME registers - "za", - "zt0", + "za", "zt0", + + // clang-format on }; ArrayRef AArch64TargetInfo::getGCCRegNames() const { From 977599343ca97d9f7e224ca28af436ed760539c4 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 1 Feb 2024 09:45:39 +0000 Subject: [PATCH 4/4] Add syntax improvements --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 ++-- llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e86d57968a46fd..bd757343e10072 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10702,11 +10702,11 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( parseConstraintCode(Constraint) != AArch64CC::Invalid) return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass); - if (StringRef("{za}").equals_insensitive(Constraint)) { + if (Constraint == "{za}") { return std::make_pair(unsigned(AArch64::ZA), &AArch64::MPRRegClass); } - if (StringRef("{zt0}").equals_insensitive(Constraint)) { + if (Constraint == "{zt0}") { return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass); } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 7d6b86ab8a3e95..cbb21cd60335e8 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -508,7 +508,7 @@ bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF, return true; // ZA/ZT0 registers are reserved but may be permitted in the clobber list. - if (PhysReg.id() == AArch64::ZA || PhysReg.id() == AArch64::ZT0) + if (PhysReg == AArch64::ZA || PhysReg == AArch64::ZT0) return true; return !isReservedReg(MF, PhysReg);