-
Notifications
You must be signed in to change notification settings - Fork 12.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoongArch] Override LoongArchTargetLowering::getExtendForAtomicCmpSwapArg #83656
Conversation
…apArg This patch aims to solve Firefox issue: https://bugzilla.mozilla.org/show_bug.cgi?id=1882301 Similar to 616289e. Currently LoongArch uses an ll.[wd]/sc.[wd] loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is full-width (i.e. the `bne` instruction), we must sign extend the input comparsion argument. Note that LoongArch ISA manual V1.1 has introduced compare-and-swap instructions. We would change the implementation (return `ANY_EXTEND`) when we support them.
@llvm/pr-subscribers-backend-loongarch Author: Lu Weining (SixWeining) ChangesThis patch aims to solve Firefox issue: Similar to 616289e. Currently LoongArch uses an ll.[wd]/sc.[wd] loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is full-width (i.e. the Note that LoongArch ISA manual V1.1 has introduced compare-and-swap instructions. We would change the implementation (return Patch is 47.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83656.diff 5 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 3324dd2e8fc217..c8e955a23336d7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4963,3 +4963,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
return !isa<ConstantSDNode>(Y);
}
+
+ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
+ // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
+ return ISD::SIGN_EXTEND;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 72182623b2c3dd..9e9ac0b8269291 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -206,6 +206,8 @@ class LoongArchTargetLowering : public TargetLowering {
return ISD::SIGN_EXTEND;
}
+ ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
+
Register getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
index b0f29ee790885d..b84c1093eb75f2 100644
--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
@@ -25,15 +25,16 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: andi $a5, $a5, 255
; LA64-NEXT: sll.w $a5, $a5, $a3
; LA64-NEXT: and $a6, $a2, $a4
-; LA64-NEXT: or $a6, $a6, $a5
+; LA64-NEXT: or $a5, $a6, $a5
+; LA64-NEXT: addi.w $a6, $a2, 0
; LA64-NEXT: .LBB0_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB0_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a5, $a0, 0
-; LA64-NEXT: bne $a5, $a2, .LBB0_5
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a6, .LBB0_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2
-; LA64-NEXT: move $a7, $a6
+; LA64-NEXT: move $a7, $a5
; LA64-NEXT: sc.w $a7, $a0, 0
; LA64-NEXT: beqz $a7, .LBB0_3
; LA64-NEXT: b .LBB0_6
@@ -42,11 +43,9 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB0_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
-; LA64-NEXT: addi.w $a6, $a2, 0
-; LA64-NEXT: move $a2, $a5
-; LA64-NEXT: bne $a5, $a6, .LBB0_1
+; LA64-NEXT: bne $a2, $a6, .LBB0_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a5, $a3
+; LA64-NEXT: srl.w $a0, $a2, $a3
; LA64-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
ret i8 %result
@@ -77,15 +76,16 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
; LA64-NEXT: sll.w $a5, $a5, $a3
; LA64-NEXT: and $a6, $a2, $a4
-; LA64-NEXT: or $a6, $a6, $a5
+; LA64-NEXT: or $a5, $a6, $a5
+; LA64-NEXT: addi.w $a6, $a2, 0
; LA64-NEXT: .LBB1_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB1_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a5, $a0, 0
-; LA64-NEXT: bne $a5, $a2, .LBB1_5
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a6, .LBB1_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2
-; LA64-NEXT: move $a7, $a6
+; LA64-NEXT: move $a7, $a5
; LA64-NEXT: sc.w $a7, $a0, 0
; LA64-NEXT: beqz $a7, .LBB1_3
; LA64-NEXT: b .LBB1_6
@@ -94,11 +94,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB1_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
-; LA64-NEXT: addi.w $a6, $a2, 0
-; LA64-NEXT: move $a2, $a5
-; LA64-NEXT: bne $a5, $a6, .LBB1_1
+; LA64-NEXT: bne $a2, $a6, .LBB1_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a5, $a3
+; LA64-NEXT: srl.w $a0, $a2, $a3
; LA64-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
ret i16 %result
@@ -107,37 +105,36 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; LA64-LABEL: atomicrmw_uinc_wrap_i32:
; LA64: # %bb.0:
-; LA64-NEXT: ld.w $a3, $a0, 0
-; LA64-NEXT: addi.w $a2, $a1, 0
+; LA64-NEXT: ld.w $a2, $a0, 0
+; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB2_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB2_3 Depth 2
-; LA64-NEXT: addi.w $a4, $a3, 0
-; LA64-NEXT: sltu $a1, $a4, $a2
-; LA64-NEXT: xori $a1, $a1, 1
-; LA64-NEXT: addi.d $a5, $a3, 1
-; LA64-NEXT: masknez $a5, $a5, $a1
+; LA64-NEXT: addi.w $a3, $a2, 0
+; LA64-NEXT: sltu $a4, $a3, $a1
+; LA64-NEXT: xori $a4, $a4, 1
+; LA64-NEXT: addi.d $a2, $a2, 1
+; LA64-NEXT: masknez $a4, $a2, $a4
; LA64-NEXT: .LBB2_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB2_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a1, $a0, 0
-; LA64-NEXT: bne $a1, $a3, .LBB2_5
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a3, .LBB2_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2
-; LA64-NEXT: move $a6, $a5
-; LA64-NEXT: sc.w $a6, $a0, 0
-; LA64-NEXT: beqz $a6, .LBB2_3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB2_3
; LA64-NEXT: b .LBB2_6
; LA64-NEXT: .LBB2_5: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB2_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
-; LA64-NEXT: move $a3, $a1
-; LA64-NEXT: bne $a1, $a4, .LBB2_1
+; LA64-NEXT: bne $a2, $a3, .LBB2_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: move $a0, $a1
+; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst
ret i32 %result
@@ -209,15 +206,16 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: andi $a6, $a6, 255
; LA64-NEXT: sll.w $a6, $a6, $a3
; LA64-NEXT: and $a7, $a2, $a4
-; LA64-NEXT: or $a7, $a7, $a6
+; LA64-NEXT: or $a6, $a7, $a6
+; LA64-NEXT: addi.w $a7, $a2, 0
; LA64-NEXT: .LBB4_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB4_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a6, $a0, 0
-; LA64-NEXT: bne $a6, $a2, .LBB4_5
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a7, .LBB4_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2
-; LA64-NEXT: move $t0, $a7
+; LA64-NEXT: move $t0, $a6
; LA64-NEXT: sc.w $t0, $a0, 0
; LA64-NEXT: beqz $t0, .LBB4_3
; LA64-NEXT: b .LBB4_6
@@ -226,11 +224,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB4_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
-; LA64-NEXT: addi.w $a7, $a2, 0
-; LA64-NEXT: move $a2, $a6
-; LA64-NEXT: bne $a6, $a7, .LBB4_1
+; LA64-NEXT: bne $a2, $a7, .LBB4_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a6, $a3
+; LA64-NEXT: srl.w $a0, $a2, $a3
; LA64-NEXT: ret
%result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
ret i8 %result
@@ -266,15 +262,16 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
; LA64-NEXT: sll.w $a6, $a6, $a3
; LA64-NEXT: and $a7, $a2, $a4
-; LA64-NEXT: or $a7, $a7, $a6
+; LA64-NEXT: or $a6, $a7, $a6
+; LA64-NEXT: addi.w $a7, $a2, 0
; LA64-NEXT: .LBB5_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB5_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a6, $a0, 0
-; LA64-NEXT: bne $a6, $a2, .LBB5_5
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a7, .LBB5_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2
-; LA64-NEXT: move $t0, $a7
+; LA64-NEXT: move $t0, $a6
; LA64-NEXT: sc.w $t0, $a0, 0
; LA64-NEXT: beqz $t0, .LBB5_3
; LA64-NEXT: b .LBB5_6
@@ -283,11 +280,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB5_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
-; LA64-NEXT: addi.w $a7, $a2, 0
-; LA64-NEXT: move $a2, $a6
-; LA64-NEXT: bne $a6, $a7, .LBB5_1
+; LA64-NEXT: bne $a2, $a7, .LBB5_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a6, $a3
+; LA64-NEXT: srl.w $a0, $a2, $a3
; LA64-NEXT: ret
%result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
ret i16 %result
@@ -296,22 +291,22 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; LA64-LABEL: atomicrmw_udec_wrap_i32:
; LA64: # %bb.0:
-; LA64-NEXT: ld.w $a4, $a0, 0
+; LA64-NEXT: ld.w $a2, $a0, 0
; LA64-NEXT: addi.w $a3, $a1, 0
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB6_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB6_3 Depth 2
-; LA64-NEXT: addi.w $a5, $a4, 0
-; LA64-NEXT: sltu $a2, $a3, $a5
-; LA64-NEXT: addi.d $a6, $a4, -1
-; LA64-NEXT: masknez $a6, $a6, $a2
-; LA64-NEXT: maskeqz $a2, $a1, $a2
-; LA64-NEXT: or $a2, $a2, $a6
-; LA64-NEXT: sltui $a6, $a5, 1
-; LA64-NEXT: masknez $a2, $a2, $a6
-; LA64-NEXT: maskeqz $a6, $a1, $a6
-; LA64-NEXT: or $a6, $a6, $a2
+; LA64-NEXT: addi.w $a4, $a2, 0
+; LA64-NEXT: sltu $a5, $a3, $a4
+; LA64-NEXT: addi.d $a2, $a2, -1
+; LA64-NEXT: masknez $a2, $a2, $a5
+; LA64-NEXT: maskeqz $a5, $a1, $a5
+; LA64-NEXT: or $a2, $a5, $a2
+; LA64-NEXT: sltui $a5, $a4, 1
+; LA64-NEXT: masknez $a2, $a2, $a5
+; LA64-NEXT: maskeqz $a5, $a1, $a5
+; LA64-NEXT: or $a5, $a5, $a2
; LA64-NEXT: .LBB6_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB6_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
@@ -319,17 +314,16 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: bne $a2, $a4, .LBB6_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2
-; LA64-NEXT: move $a7, $a6
-; LA64-NEXT: sc.w $a7, $a0, 0
-; LA64-NEXT: beqz $a7, .LBB6_3
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB6_3
; LA64-NEXT: b .LBB6_6
; LA64-NEXT: .LBB6_5: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB6_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
-; LA64-NEXT: move $a4, $a2
-; LA64-NEXT: bne $a2, $a5, .LBB6_1
+; LA64-NEXT: bne $a2, $a4, .LBB6_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
index 417c865f6383ff..31ecec6ea8051b 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
@@ -69,6 +69,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_acquire_acquire:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: ll.w $a3, $a0, 0
; LA64-NEXT: bne $a3, $a1, .LBB2_3
@@ -172,6 +173,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_acquire_monotonic:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: ll.w $a3, $a0, 0
; LA64-NEXT: bne $a3, $a1, .LBB6_3
@@ -279,9 +281,10 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou
define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a3, $a1, 0
; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a3, $a0, 0
-; LA64-NEXT: bne $a3, $a1, .LBB10_3
+; LA64-NEXT: ll.w $a1, $a0, 0
+; LA64-NEXT: bne $a1, $a3, .LBB10_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1
; LA64-NEXT: move $a4, $a2
; LA64-NEXT: sc.w $a4, $a0, 0
@@ -290,7 +293,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou
; LA64-NEXT: .LBB10_3:
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB10_4:
-; LA64-NEXT: move $a0, $a3
+; LA64-NEXT: move $a0, $a1
; LA64-NEXT: ret
%tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
%res = extractvalue { i32, i1 } %tmp, 0
@@ -396,6 +399,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw
define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: ll.w $a3, $a0, 0
; LA64-NEXT: bne $a3, $a1, .LBB14_3
@@ -407,8 +411,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw
; LA64-NEXT: .LBB14_3:
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB14_4:
-; LA64-NEXT: addi.w $a0, $a1, 0
-; LA64-NEXT: xor $a0, $a3, $a0
+; LA64-NEXT: xor $a0, $a3, $a1
; LA64-NEXT: sltui $a0, $a0, 1
; LA64-NEXT: ret
%tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
@@ -506,6 +509,7 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw
define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_monotonic_monotonic:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: ll.w $a3, $a0, 0
; LA64-NEXT: bne $a3, $a1, .LBB18_3
@@ -613,9 +617,10 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val)
define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a3, $a1, 0
; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a3, $a0, 0
-; LA64-NEXT: bne $a3, $a1, .LBB22_3
+; LA64-NEXT: ll.w $a1, $a0, 0
+; LA64-NEXT: bne $a1, $a3, .LBB22_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1
; LA64-NEXT: move $a4, $a2
; LA64-NEXT: sc.w $a4, $a0, 0
@@ -624,7 +629,7 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val)
; LA64-NEXT: .LBB22_3:
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB22_4:
-; LA64-NEXT: move $a0, $a3
+; LA64-NEXT: move $a0, $a1
; LA64-NEXT: ret
%tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
%res = extractvalue { i32, i1 } %tmp, 0
@@ -730,6 +735,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n
define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: ll.w $a3, $a0, 0
; LA64-NEXT: bne $a3, $a1, .LBB26_3
@@ -741,8 +747,7 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n
; LA64-NEXT: .LBB26_3:
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB26_4:
-; LA64-NEXT: addi.w $a0, $a1, 0
-; LA64-NEXT: xor $a0, $a3, $a0
+; LA64-NEXT: xor $a0, $a3, $a1
; LA64-NEXT: sltui $a0, $a0, 1
; LA64-NEXT: ret
%tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
index 589360823b1488..4d8160d7080340 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
@@ -16,6 +16,7 @@ define float @float_fadd_acquire(ptr %p) nounwind {
; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1
; LA64F-NEXT: movfr2gr.s $a1, $fa2
; LA64F-NEXT: movfr2gr.s $a2, $fa0
+; LA64F-NEXT: addi.w $a2, $a2, 0
; LA64F-NEXT: .LBB0_3: # %atomicrmw.start
; LA64F-NEXT: # Parent Loop BB0_1 Depth=1
; LA64F-NEXT: # => This Inner Loop Header: Depth=2
@@ -33,8 +34,7 @@ define float @float_fadd_acquire(ptr %p) nounwind {
; LA64F-NEXT: .LBB0_6: # %atomicrmw.start
; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1
; LA64F-NEXT: movgr2fr.w $fa0, $a3
-; LA64F-NEXT: addi.w $a1, $a2, 0
-; LA64F-NEXT: bne $a3, $a1, .LBB0_1
+; LA64F-NEXT: bne $a3, $a2, .LBB0_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: ret
;
@@ -51,6 +51,7 @@ define float @float_fadd_acquire(ptr %p) nounwind {
; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1
; LA64D-NEXT: movfr2gr.s $a1, $fa2
; LA64D-NEXT: movfr2gr.s $a2, $fa0
+; LA64D-NEXT: addi.w $a2, $a2, 0
; LA64D-NEXT: .LBB0_3: # %atomicrmw.start
; LA64D-NEXT: # Parent Loop BB0_1 Depth=1
; LA64D-NEXT: # => This Inner Loop Header: Depth=2
@@ -68,8 +69,7 @@ define float @float_fadd_acquire(ptr %p) nounwind {
; LA64D-NEXT: .LBB0_6: # %atomicrmw.start
; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1
; LA64D-NEXT: movgr2fr.w $fa0, $a3
-; LA64D-NEXT: addi.w $a1, $a2, 0
-; LA64D-NEXT: bne $a3, $a1, .LBB0_1
+; LA64D-NEXT: bne $a3, $a2, .LBB0_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: ret
%v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4
@@ -90,6 +90,7 @@ define float @float_fsub_acquire(ptr %p) nounwind {
; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1
; LA64F-NEXT: movfr2gr.s $a1, $fa2
; LA64F-NEXT: movfr2gr.s $a2, $fa0
+; LA64F-NEXT: addi.w $a2, $a2, 0
; LA64F-NEXT: .LBB1_3: # %atomicrmw.start
; LA64F-NEXT: # Parent Loop BB1_1 Depth=1
; LA64F-NEXT: # => This Inner Loop Header: Depth=2
@@ -107,8 +108,7 @@ define float @float_fsub_acquire(ptr %p) nounwind {
; LA64F-NEXT: .LBB1_6: # %atomicrmw.start
; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1
; LA64F-NEXT: movgr2fr.w $fa0, $a3
-; LA64F-NEXT: addi.w $a1, $a2, 0
-; LA64F-NEXT: bne $a3, $a1, .LBB1_1
+; LA64F-NEXT: bne $a3, $a2, .LBB1_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: ret
;
@@ -125,6 +125,7 @@ define float @float_fsub_acquire(ptr %p) nounwind {
; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1
; LA64D-NEXT: movfr2gr.s $a1, $fa2
; LA64D-NEXT: movfr2gr.s $a2, $fa0
+; LA64D-NEXT: addi.w $a2, $a2, 0
; LA64D-NEXT: .LBB1_3: # %atomicrmw.start
; LA64D-NEXT: # Parent Loop BB1_1 Depth=1
; LA64D-NEXT: # => This Inner Loop Header: Depth=2
@@ -142,8 +143,7 @@ define float @float_fsub_acquire(ptr %p) nounwind {
; LA64D-NEXT: .LBB1_6: # %atomicrmw.start
; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1
; LA64D-NEXT: movgr2fr.w $fa0, $a3
-; LA64D-NEXT: addi.w $a1, $a2, 0
-; LA64D-NEXT: bne $a3, $a1, .LBB1_1
+; LA64D-NEXT: bne $a3, $a2, .LBB1_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: ret
%v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4
@@ -165,6 +165,7 @@ define float @float_fmin_acquire(ptr %p) nounwind {
; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1
; LA64F-NEXT: movfr2gr.s $a1, $fa2
; LA64F-NEXT: movfr2gr.s $a2, $fa0
+; LA64F-NEXT: addi.w $a2, $a2, 0
; LA64F-NEXT: .LBB2_3: # %atomicrmw.start
; LA64F-NEXT: # Parent Loop BB2_1 Depth=1
; LA64F-NEXT: # => This Inner Loop Header: Depth=2
@@ -182,8 +183,7 @@ define float @float_fmin_acquire(ptr %p) nounwind {
; LA64F-NE...
[truncated]
|
A test case: https://godbolt.org/z/Gvz7ea8K6 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be good. Is the LA64 v1.10 work already underway? If not I'll probably handle some of these.
Not yet. Feel free to handle them. Many thanks! |
…apArg (llvm#83656) This patch aims to solve Firefox issue: https://bugzilla.mozilla.org/show_bug.cgi?id=1882301 Similar to 616289e. Currently LoongArch uses an ll.[wd]/sc.[wd] loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is full-width (i.e. the `bne` instruction), we must sign extend the input comparsion argument. Note that LoongArch ISA manual V1.1 has introduced compare-and-swap instructions. We would change the implementation (return `ANY_EXTEND`) when we support them. (cherry picked from commit 5f058aa)
…apArg (llvm#83656) This patch aims to solve Firefox issue: https://bugzilla.mozilla.org/show_bug.cgi?id=1882301 Similar to 616289e. Currently LoongArch uses an ll.[wd]/sc.[wd] loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is full-width (i.e. the `bne` instruction), we must sign extend the input comparsion argument. Note that LoongArch ISA manual V1.1 has introduced compare-and-swap instructions. We would change the implementation (return `ANY_EXTEND`) when we support them. (cherry picked from commit 5f058aa)
…apArg (llvm#83656) This patch aims to solve Firefox issue: https://bugzilla.mozilla.org/show_bug.cgi?id=1882301 Similar to 616289e. Currently LoongArch uses an ll.[wd]/sc.[wd] loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is full-width (i.e. the `bne` instruction), we must sign extend the input comparsion argument. Note that LoongArch ISA manual V1.1 has introduced compare-and-swap instructions. We would change the implementation (return `ANY_EXTEND`) when we support them. (cherry picked from commit 5f058aa)
This patch aims to solve Firefox issue:
https://bugzilla.mozilla.org/show_bug.cgi?id=1882301
Similar to 616289e. Currently LoongArch uses an ll.[wd]/sc.[wd] loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is full-width (i.e. the
bne
instruction), we must sign extend the input comparsion argument.Note that LoongArch ISA manual V1.1 has introduced compare-and-swap instructions. We would change the implementation (return
ANY_EXTEND
) when we support them.