Skip to content

Commit

Permalink
[AArch64] Increase inline memmove limit to 16 stored registers (llvm#…
Browse files Browse the repository at this point in the history
…111848)

The memcpy inline limit has been 16 for a long time, this patch makes
the memmove inline limit the same, allowing small-constant sized
memmoves to be emitted inline. The 16 is the number of registers stored,
which equates to a limit of 256 bytes.
  • Loading branch information
davemgreen authored and DanielCChen committed Oct 16, 2024
1 parent 3302f06 commit 8480176
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 68 deletions.
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1146,7 +1146,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;

MaxStoresPerMemmoveOptSize = 4;
MaxStoresPerMemmove = 4;
MaxStoresPerMemmove =
Subtarget->requiresStrictAlign() ? MaxStoresPerMemmoveOptSize : 16;

MaxLoadsPerMemcmpOptSize = 4;
MaxLoadsPerMemcmp =
Expand Down
169 changes: 102 additions & 67 deletions llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,12 @@ body: |
; CHECK-LABEL: name: test_memmove1
; CHECK: liveins: $x0, $x1, $x2
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4)
; CHECK: RET_ReallyLR
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
; CHECK-NEXT: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4)
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(s64) = COPY $x2
Expand All @@ -83,23 +84,24 @@ body: |
; CHECK-LABEL: name: test_memmove2_const
; CHECK: liveins: $x0, $x1
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p0) :: (store (s128) into %ir.0 + 16, align 4)
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4)
; CHECK: RET_ReallyLR
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p0) :: (store (s128) into %ir.0 + 16, align 4)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4)
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(s64) = G_CONSTANT i64 48
Expand All @@ -117,11 +119,42 @@ body: |
; CHECK-LABEL: name: test_memmove3_const_toolarge
; CHECK: liveins: $x0, $x1
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4)
; CHECK: RET_ReallyLR
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load (s128) from %ir.1 + 48, align 4)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD3]](p0) :: (load (s128) from %ir.1 + 64, align 4)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64)
; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load (s128) from %ir.1 + 80, align 4)
; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD5]](p0) :: (store (s128) into %ir.0 + 16, align 4)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD6]](p0) :: (store (s128) into %ir.0 + 32, align 4)
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
; CHECK-NEXT: G_STORE [[LOAD3]](s128), [[PTR_ADD7]](p0) :: (store (s128) into %ir.0 + 48, align 4)
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
; CHECK-NEXT: G_STORE [[LOAD4]](s128), [[PTR_ADD8]](p0) :: (store (s128) into %ir.0 + 64, align 4)
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
; CHECK-NEXT: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store (s128) into %ir.0 + 80, align 4)
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(s64) = G_CONSTANT i64 96
Expand All @@ -139,29 +172,30 @@ body: |
; CHECK-LABEL: name: test_memmove4_const_unaligned
; CHECK: liveins: $x0, $x1
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.1 + 48)
; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 16, align 4)
; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD4]](p0) :: (store (s128) into %ir.0 + 32, align 4)
; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.0 + 48)
; CHECK: RET_ReallyLR
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.1 + 48)
; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 16, align 4)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD4]](p0) :: (store (s128) into %ir.0 + 32, align 4)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.0 + 48)
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(s64) = G_CONSTANT i64 52
Expand All @@ -179,23 +213,24 @@ body: |
; CHECK-LABEL: name: test_memmove_addrspace
; CHECK: liveins: $x0, $x1
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(p2) = COPY $x1
; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p2) :: (load (s128) from %ir.1, align 4, addrspace 2)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C]](s64)
; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p2) :: (load (s128) from %ir.1 + 16, align 4, addrspace 2)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C1]](s64)
; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p2) :: (load (s128) from %ir.1 + 32, align 4, addrspace 2)
; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p1) :: (store (s128) into %ir.0, align 4, addrspace 1)
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p1) :: (store (s128) into %ir.0 + 16, align 4, addrspace 1)
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p1) :: (store (s128) into %ir.0 + 32, align 4, addrspace 1)
; CHECK: RET_ReallyLR
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p2) = COPY $x1
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p2) :: (load (s128) from %ir.1, align 4, addrspace 2)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p2) :: (load (s128) from %ir.1 + 16, align 4, addrspace 2)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C1]](s64)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p2) :: (load (s128) from %ir.1 + 32, align 4, addrspace 2)
; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p1) :: (store (s128) into %ir.0, align 4, addrspace 1)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p1) :: (store (s128) into %ir.0 + 16, align 4, addrspace 1)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p1) :: (store (s128) into %ir.0 + 32, align 4, addrspace 1)
; CHECK-NEXT: RET_ReallyLR
%0:_(p1) = COPY $x0
%1:_(p2) = COPY $x1
%2:_(s64) = G_CONSTANT i64 48
Expand Down
122 changes: 122 additions & 0 deletions llvm/test/CodeGen/AArch64/memmove-inline.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ALIGNED
; RUN: llc -mtriple=aarch64 -mattr=+strict-align < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNALIGNED

; Small (16 bytes here) unaligned memmove() should be a function call if
; strict-alignment is turned on.
define void @t16(ptr %out, ptr %in) {
; CHECK-ALIGNED-LABEL: t16:
; CHECK-ALIGNED: // %bb.0: // %entry
; CHECK-ALIGNED-NEXT: ldr q0, [x1]
; CHECK-ALIGNED-NEXT: str q0, [x0]
; CHECK-ALIGNED-NEXT: ret
;
; CHECK-UNALIGNED-LABEL: t16:
; CHECK-UNALIGNED: // %bb.0: // %entry
; CHECK-UNALIGNED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-UNALIGNED-NEXT: .cfi_def_cfa_offset 16
; CHECK-UNALIGNED-NEXT: .cfi_offset w30, -16
; CHECK-UNALIGNED-NEXT: mov w2, #16 // =0x10
; CHECK-UNALIGNED-NEXT: bl memmove
; CHECK-UNALIGNED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-UNALIGNED-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false)
ret void
}

; Small (16 bytes here) aligned memmove() should be inlined even if
; strict-alignment is turned on.
define void @t16_aligned(ptr align 8 %out, ptr align 8 %in) {
; CHECK-ALIGNED-LABEL: t16_aligned:
; CHECK-ALIGNED: // %bb.0: // %entry
; CHECK-ALIGNED-NEXT: ldr q0, [x1]
; CHECK-ALIGNED-NEXT: str q0, [x0]
; CHECK-ALIGNED-NEXT: ret
;
; CHECK-UNALIGNED-LABEL: t16_aligned:
; CHECK-UNALIGNED: // %bb.0: // %entry
; CHECK-UNALIGNED-NEXT: ldp x9, x8, [x1]
; CHECK-UNALIGNED-NEXT: stp x9, x8, [x0]
; CHECK-UNALIGNED-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false)
ret void
}

; Tiny (4 bytes here) unaligned memmove() should be inlined with byte sized
; loads and stores if strict-alignment is turned on.
define void @t4(ptr %out, ptr %in) {
; CHECK-ALIGNED-LABEL: t4:
; CHECK-ALIGNED: // %bb.0: // %entry
; CHECK-ALIGNED-NEXT: ldr w8, [x1]
; CHECK-ALIGNED-NEXT: str w8, [x0]
; CHECK-ALIGNED-NEXT: ret
;
; CHECK-UNALIGNED-LABEL: t4:
; CHECK-UNALIGNED: // %bb.0: // %entry
; CHECK-UNALIGNED-NEXT: ldrb w8, [x1, #3]
; CHECK-UNALIGNED-NEXT: ldrb w9, [x1, #2]
; CHECK-UNALIGNED-NEXT: ldrb w10, [x1]
; CHECK-UNALIGNED-NEXT: ldrb w11, [x1, #1]
; CHECK-UNALIGNED-NEXT: strb w8, [x0, #3]
; CHECK-UNALIGNED-NEXT: strb w9, [x0, #2]
; CHECK-UNALIGNED-NEXT: strb w11, [x0, #1]
; CHECK-UNALIGNED-NEXT: strb w10, [x0]
; CHECK-UNALIGNED-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false)
ret void
}

define void @t256(ptr %out, ptr %in) {
; CHECK-ALIGNED-LABEL: t256:
; CHECK-ALIGNED: // %bb.0: // %entry
; CHECK-ALIGNED-NEXT: ldp q0, q1, [x1]
; CHECK-ALIGNED-NEXT: ldp q2, q3, [x1, #32]
; CHECK-ALIGNED-NEXT: ldp q4, q5, [x1, #64]
; CHECK-ALIGNED-NEXT: ldp q6, q7, [x1, #96]
; CHECK-ALIGNED-NEXT: ldp q16, q17, [x1, #224]
; CHECK-ALIGNED-NEXT: ldp q18, q19, [x1, #128]
; CHECK-ALIGNED-NEXT: ldp q20, q21, [x1, #160]
; CHECK-ALIGNED-NEXT: ldp q22, q23, [x1, #192]
; CHECK-ALIGNED-NEXT: stp q0, q1, [x0]
; CHECK-ALIGNED-NEXT: stp q2, q3, [x0, #32]
; CHECK-ALIGNED-NEXT: stp q4, q5, [x0, #64]
; CHECK-ALIGNED-NEXT: stp q6, q7, [x0, #96]
; CHECK-ALIGNED-NEXT: stp q18, q19, [x0, #128]
; CHECK-ALIGNED-NEXT: stp q20, q21, [x0, #160]
; CHECK-ALIGNED-NEXT: stp q22, q23, [x0, #192]
; CHECK-ALIGNED-NEXT: stp q16, q17, [x0, #224]
; CHECK-ALIGNED-NEXT: ret
;
; CHECK-UNALIGNED-LABEL: t256:
; CHECK-UNALIGNED: // %bb.0: // %entry
; CHECK-UNALIGNED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-UNALIGNED-NEXT: .cfi_def_cfa_offset 16
; CHECK-UNALIGNED-NEXT: .cfi_offset w30, -16
; CHECK-UNALIGNED-NEXT: mov w2, #256 // =0x100
; CHECK-UNALIGNED-NEXT: bl memmove
; CHECK-UNALIGNED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-UNALIGNED-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 256, i1 false)
ret void
}

define void @t257(ptr %out, ptr %in) {
; CHECK-LABEL: t257:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: mov w2, #257 // =0x101
; CHECK-NEXT: bl memmove
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 257, i1 false)
ret void
}

declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)

0 comments on commit 8480176

Please sign in to comment.