-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Don't replace dst of SWP instructions with (X|W)ZR #102139
Conversation
This change updates the AArch64DeadRegisterDefinition pass to ensure it does not replace the destination register of a SWP instruction with the zero register when its value is unused. This is necessary to ensure that the ordering of such instructions in relation to DMB.LD barries adheres to the definitions of the AArch64 Memory Model. The memory model states the following (ARMARM version DDI 0487K.a §B2.3.7): ``` Barrier-ordered-before An effect E1 is Barrier-ordered-before an effect E2 if one of the following applies: [...] * All of the following apply: - E1 is a Memory Read effect. - E1 is generated by an instruction whose destination register is not WZR or XZR. - E1 appears in program order before E3. - E3 is either a DMB LD effect or a DSB LD effect. - E3 appears in program order before E2. ``` Prior to this change, by replacing the destination register of such SWP instruction with WZR/XZR, the ordering relation described above was incorrectly removed from the generated code. The new behaviour is ensured in this patch by adding the relevant `SWP[L](B|H|W|X)` instructions to list in the `atomicReadDroppedOnZero` predicate, which already covered the `LD<Op>` instructions that are subject to the same effect. Fixes llvm#68428.
@llvm/pr-subscribers-backend-aarch64 Author: Lucas Duarte Prates (pratlucas) ChangesThis change updates the AArch64DeadRegisterDefinition pass to ensure it The memory model states the following (ARMARM version DDI 0487K.a §B2.3.7):
Prior to this change, by replacing the destination register of such SWP The new behaviour is ensured in this patch by adding the relevant Fixes #68428. Full diff: https://github.com/llvm/llvm-project/pull/102139.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 2bc14f9821e63..161cf24dd4037 100644
--- a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -108,6 +108,10 @@ static bool atomicReadDroppedOnZero(unsigned Opcode) {
case AArch64::LDUMINW: case AArch64::LDUMINX:
case AArch64::LDUMINLB: case AArch64::LDUMINLH:
case AArch64::LDUMINLW: case AArch64::LDUMINLX:
+ case AArch64::SWPB: case AArch64::SWPH:
+ case AArch64::SWPW: case AArch64::SWPX:
+ case AArch64::SWPLB: case AArch64::SWPLH:
+ case AArch64::SWPLW: case AArch64::SWPLX:
return true;
}
return false;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-exchange-fence.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-exchange-fence.ll
new file mode 100644
index 0000000000000..2adbc709d238d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-exchange-fence.ll
@@ -0,0 +1,64 @@
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse -O0 | FileCheck %s
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse -O1 | FileCheck %s
+
+; When their destination register is WZR/ZZR, SWP operations are not regarded as
+; a read for the purpose of a DMB.LD in the AArch64 memory model.
+; This test ensures that the AArch64DeadRegisterDefinitions pass does not
+; replace the desitnation register of SWP instructions with the zero register
+; when the read value is unused.
+
+define dso_local i32 @atomic_exchange_monotonic(ptr %ptr, ptr %ptr2, i32 %value) {
+; CHECK-LABEL: atomic_exchange_monotonic:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swp
+; CHECK-NOT: wzr
+; CHECK-NEXT: dmb ishld
+; CHECK-NEXT: ldr w0, [x1]
+; CHECK-NEXT: ret
+ %r0 = atomicrmw xchg ptr %ptr, i32 %value monotonic
+ fence acquire
+ %r1 = load atomic i32, ptr %ptr2 monotonic, align 4
+ ret i32 %r1
+}
+
+define dso_local i32 @atomic_exchange_acquire(ptr %ptr, ptr %ptr2, i32 %value) {
+; CHECK-LABEL: atomic_exchange_acquire:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpa
+; CHECK-NOT: wzr
+; CHECK-NEXT: dmb ishld
+; CHECK-NEXT: ldr w0, [x1]
+; CHECK-NEXT: ret
+ %r0 = atomicrmw xchg ptr %ptr, i32 %value acquire
+ fence acquire
+ %r1 = load atomic i32, ptr %ptr2 monotonic, align 4
+ ret i32 %r1
+}
+
+define dso_local i32 @atomic_exchange_release(ptr %ptr, ptr %ptr2, i32 %value) {
+; CHECK-LABEL: atomic_exchange_release:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpl
+; CHECK-NOT: wzr
+; CHECK-NEXT: dmb ishld
+; CHECK-NEXT: ldr w0, [x1]
+; CHECK-NEXT: ret
+ %r0 = atomicrmw xchg ptr %ptr, i32 %value release
+ fence acquire
+ %r1 = load atomic i32, ptr %ptr2 monotonic, align 4
+ ret i32 %r1
+}
+
+define dso_local i32 @atomic_exchange_acquire_release(ptr %ptr, ptr %ptr2, i32 %value) {
+; CHECK-LABEL: atomic_exchange_acquire_release:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpal
+; CHECK-NOT: wzr
+; CHECK-NEXT: dmb ishld
+; CHECK-NEXT: ldr w0, [x1]
+; CHECK-NEXT: ret
+ %r0 = atomicrmw xchg ptr %ptr, i32 %value acq_rel
+ fence acquire
+ %r1 = load atomic i32, ptr %ptr2 monotonic, align 4
+ ret i32 %r1
+}
|
You can test this locally with the following command:git-clang-format --diff 4c6a89710a2580f9784408aae81f73d607d9942d c17af81392df8b5cb9d042b770f44785ad594e4e --extensions cpp -- llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 161cf24dd4..cb48e38a6a 100644
--- a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -108,11 +108,15 @@ static bool atomicReadDroppedOnZero(unsigned Opcode) {
case AArch64::LDUMINW: case AArch64::LDUMINX:
case AArch64::LDUMINLB: case AArch64::LDUMINLH:
case AArch64::LDUMINLW: case AArch64::LDUMINLX:
- case AArch64::SWPB: case AArch64::SWPH:
- case AArch64::SWPW: case AArch64::SWPX:
- case AArch64::SWPLB: case AArch64::SWPLH:
- case AArch64::SWPLW: case AArch64::SWPLX:
- return true;
+ case AArch64::SWPB:
+ case AArch64::SWPH:
+ case AArch64::SWPW:
+ case AArch64::SWPX:
+ case AArch64::SWPLB:
+ case AArch64::SWPLH:
+ case AArch64::SWPLW:
+ case AArch64::SWPLX:
+ return true;
}
return false;
}
|
Should this be backported to 19.x? |
thanks for fixing this @pratlucas ! the patch looks good to me, I note that this semantics applies to CAS/SWP/LD and ST variants, so if CAS is used as the target of an atomic in the future in a similar fashion, the behaviour will arise again. Currently CAS is used in other op that requires the dst, and so it’s fine for now.
Yes I think so, it’s relatively harmless a fix but ensures translation of atomic RMW intrinsics, it shouldn’t affect other backends |
/cherry-pick beb37e2 |
…2139) This change updates the AArch64DeadRegisterDefinition pass to ensure it does not replace the destination register of a SWP instruction with the zero register when its value is unused. This is necessary to ensure that the ordering of such instructions in relation to DMB.LD barries adheres to the definitions of the AArch64 Memory Model. The memory model states the following (ARMARM version DDI 0487K.a §B2.3.7): ``` Barrier-ordered-before An effect E1 is Barrier-ordered-before an effect E2 if one of the following applies: [...] * All of the following apply: - E1 is a Memory Read effect. - E1 is generated by an instruction whose destination register is not WZR or XZR. - E1 appears in program order before E3. - E3 is either a DMB LD effect or a DSB LD effect. - E3 appears in program order before E2. ``` Prior to this change, by replacing the destination register of such SWP instruction with WZR/XZR, the ordering relation described above was incorrectly removed from the generated code. The new behaviour is ensured in this patch by adding the relevant `SWP[L](B|H|W|X)` instructions to list in the `atomicReadDroppedOnZero` predicate, which already covered the `LD<Op>` instructions that are subject to the same effect. Fixes llvm#68428. (cherry picked from commit beb37e2)
/pull-request #102316 |
This change updates the AArch64DeadRegisterDefinition pass to ensure it does not replace the destination register of a SWP instruction with the zero register when its value is unused. This is necessary to ensure that the ordering of such instructions in relation to DMB.LD barries adheres to the definitions of the AArch64 Memory Model. The memory model states the following (ARMARM version DDI 0487K.a §B2.3.7): ``` Barrier-ordered-before An effect E1 is Barrier-ordered-before an effect E2 if one of the following applies: [...] * All of the following apply: - E1 is a Memory Read effect. - E1 is generated by an instruction whose destination register is not WZR or XZR. - E1 appears in program order before E3. - E3 is either a DMB LD effect or a DSB LD effect. - E3 appears in program order before E2. ``` Prior to this change, by replacing the destination register of such SWP instruction with WZR/XZR, the ordering relation described above was incorrectly removed from the generated code. The new behaviour is ensured in this patch by adding the relevant `SWP[L](B|H|W|X)` instructions to list in the `atomicReadDroppedOnZero` predicate, which already covered the `LD<Op>` instructions that are subject to the same effect. Fixes #68428.
…2139) This change updates the AArch64DeadRegisterDefinition pass to ensure it does not replace the destination register of a SWP instruction with the zero register when its value is unused. This is necessary to ensure that the ordering of such instructions in relation to DMB.LD barries adheres to the definitions of the AArch64 Memory Model. The memory model states the following (ARMARM version DDI 0487K.a §B2.3.7): ``` Barrier-ordered-before An effect E1 is Barrier-ordered-before an effect E2 if one of the following applies: [...] * All of the following apply: - E1 is a Memory Read effect. - E1 is generated by an instruction whose destination register is not WZR or XZR. - E1 appears in program order before E3. - E3 is either a DMB LD effect or a DSB LD effect. - E3 appears in program order before E2. ``` Prior to this change, by replacing the destination register of such SWP instruction with WZR/XZR, the ordering relation described above was incorrectly removed from the generated code. The new behaviour is ensured in this patch by adding the relevant `SWP[L](B|H|W|X)` instructions to list in the `atomicReadDroppedOnZero` predicate, which already covered the `LD<Op>` instructions that are subject to the same effect. Fixes llvm#68428. (cherry picked from commit beb37e2)
…2139) This change updates the AArch64DeadRegisterDefinition pass to ensure it does not replace the destination register of a SWP instruction with the zero register when its value is unused. This is necessary to ensure that the ordering of such instructions in relation to DMB.LD barries adheres to the definitions of the AArch64 Memory Model. The memory model states the following (ARMARM version DDI 0487K.a §B2.3.7): ``` Barrier-ordered-before An effect E1 is Barrier-ordered-before an effect E2 if one of the following applies: [...] * All of the following apply: - E1 is a Memory Read effect. - E1 is generated by an instruction whose destination register is not WZR or XZR. - E1 appears in program order before E3. - E3 is either a DMB LD effect or a DSB LD effect. - E3 appears in program order before E2. ``` Prior to this change, by replacing the destination register of such SWP instruction with WZR/XZR, the ordering relation described above was incorrectly removed from the generated code. The new behaviour is ensured in this patch by adding the relevant `SWP[L](B|H|W|X)` instructions to list in the `atomicReadDroppedOnZero` predicate, which already covered the `LD<Op>` instructions that are subject to the same effect. Fixes llvm#68428.
This change updates the AArch64DeadRegisterDefinition pass to ensure it
does not replace the destination register of a SWP instruction with the
zero register when its value is unused. This is necessary to ensure that
the ordering of such instructions in relation to DMB.LD barries adheres
to the definitions of the AArch64 Memory Model.
The memory model states the following (ARMARM version DDI 0487K.a §B2.3.7):
Prior to this change, by replacing the destination register of such SWP
instruction with WZR/XZR, the ordering relation described above was
incorrectly removed from the generated code.
The new behaviour is ensured in this patch by adding the relevant
SWP[L](B|H|W|X)
instructions to list in theatomicReadDroppedOnZero
predicate, which already covered the
LD<Op>
instructions that aresubject to the same effect.
Fixes #68428.