forked from GPUOpen-Drivers/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ARM] Additional test for Min loop. NFC
- Loading branch information
1 parent
b0ce615
commit 5abbf20
Showing
1 changed file
with
193 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s | ||
|
||
define void @arm_min_q31(i32* nocapture readonly %pSrc, i32 %blockSize, i32* nocapture %pResult, i32* nocapture %pIndex) { | ||
; CHECK-LABEL: arm_min_q31: | ||
; CHECK: @ %bb.0: @ %entry | ||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} | ||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} | ||
; CHECK-NEXT: .pad #4 | ||
; CHECK-NEXT: sub sp, #4 | ||
; CHECK-NEXT: ldr.w r12, [r0] | ||
; CHECK-NEXT: subs.w r9, r1, #1 | ||
; CHECK-NEXT: beq .LBB0_3 | ||
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader | ||
; CHECK-NEXT: subs r6, r1, #2 | ||
; CHECK-NEXT: and r7, r9, #3 | ||
; CHECK-NEXT: cmp r6, #3 | ||
; CHECK-NEXT: str r7, [sp] @ 4-byte Spill | ||
; CHECK-NEXT: bhs .LBB0_4 | ||
; CHECK-NEXT: @ %bb.2: | ||
; CHECK-NEXT: mov.w r8, #0 | ||
; CHECK-NEXT: b .LBB0_6 | ||
; CHECK-NEXT: .LBB0_3: | ||
; CHECK-NEXT: mov.w r8, #0 | ||
; CHECK-NEXT: b .LBB0_10 | ||
; CHECK-NEXT: .LBB0_4: @ %while.body.preheader.new | ||
; CHECK-NEXT: bic r6, r9, #3 | ||
; CHECK-NEXT: movs r4, #1 | ||
; CHECK-NEXT: subs r6, #4 | ||
; CHECK-NEXT: mov.w r8, #0 | ||
; CHECK-NEXT: add.w lr, r4, r6, lsr #2 | ||
; CHECK-NEXT: movs r6, #4 | ||
; CHECK-NEXT: mov lr, lr | ||
; CHECK-NEXT: mov r11, lr | ||
; CHECK-NEXT: .LBB0_5: @ %while.body | ||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 | ||
; CHECK-NEXT: ldr r10, [r0, #16]! | ||
; CHECK-NEXT: mov lr, r11 | ||
; CHECK-NEXT: sub.w lr, lr, #1 | ||
; CHECK-NEXT: sub.w r9, r9, #4 | ||
; CHECK-NEXT: ldrd r7, r5, [r0, #-12] | ||
; CHECK-NEXT: mov r11, lr | ||
; CHECK-NEXT: ldr r4, [r0, #-4] | ||
; CHECK-NEXT: cmp r12, r7 | ||
; CHECK-NEXT: it gt | ||
; CHECK-NEXT: subgt.w r8, r6, #3 | ||
; CHECK-NEXT: csel r7, r7, r12, gt | ||
; CHECK-NEXT: cmp r7, r5 | ||
; CHECK-NEXT: it gt | ||
; CHECK-NEXT: subgt.w r8, r6, #2 | ||
; CHECK-NEXT: csel r7, r5, r7, gt | ||
; CHECK-NEXT: cmp r7, r4 | ||
; CHECK-NEXT: it gt | ||
; CHECK-NEXT: subgt.w r8, r6, #1 | ||
; CHECK-NEXT: csel r7, r4, r7, gt | ||
; CHECK-NEXT: cmp r7, r10 | ||
; CHECK-NEXT: csel r8, r6, r8, gt | ||
; CHECK-NEXT: add.w r6, r6, #4 | ||
; CHECK-NEXT: csel r12, r10, r7, gt | ||
; CHECK-NEXT: cmp.w lr, #0 | ||
; CHECK-NEXT: bne .LBB0_5 | ||
; CHECK-NEXT: b .LBB0_6 | ||
; CHECK-NEXT: .LBB0_6: @ %while.end.loopexit.unr-lcssa | ||
; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload | ||
; CHECK-NEXT: cbz r7, .LBB0_10 | ||
; CHECK-NEXT: @ %bb.7: @ %while.body.epil | ||
; CHECK-NEXT: ldr r4, [r0, #4] | ||
; CHECK-NEXT: sub.w r1, r1, r9 | ||
; CHECK-NEXT: cmp r12, r4 | ||
; CHECK-NEXT: csel r8, r1, r8, gt | ||
; CHECK-NEXT: csel r12, r4, r12, gt | ||
; CHECK-NEXT: cmp r7, #1 | ||
; CHECK-NEXT: beq .LBB0_10 | ||
; CHECK-NEXT: @ %bb.8: @ %while.body.epil.1 | ||
; CHECK-NEXT: ldr r4, [r0, #8] | ||
; CHECK-NEXT: cmp r12, r4 | ||
; CHECK-NEXT: csinc r8, r8, r1, le | ||
; CHECK-NEXT: csel r12, r4, r12, gt | ||
; CHECK-NEXT: cmp r7, #2 | ||
; CHECK-NEXT: beq .LBB0_10 | ||
; CHECK-NEXT: @ %bb.9: @ %while.body.epil.2 | ||
; CHECK-NEXT: ldr r0, [r0, #12] | ||
; CHECK-NEXT: cmp r12, r0 | ||
; CHECK-NEXT: it gt | ||
; CHECK-NEXT: addgt.w r8, r1, #2 | ||
; CHECK-NEXT: csel r12, r0, r12, gt | ||
; CHECK-NEXT: .LBB0_10: @ %while.end | ||
; CHECK-NEXT: str.w r12, [r2] | ||
; CHECK-NEXT: str.w r8, [r3] | ||
; CHECK-NEXT: add sp, #4 | ||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} | ||
entry: | ||
%0 = load i32, i32* %pSrc, align 4 | ||
%blkCnt.015 = add i32 %blockSize, -1 | ||
%cmp.not17 = icmp eq i32 %blkCnt.015, 0 | ||
br i1 %cmp.not17, label %while.end, label %while.body.preheader | ||
|
||
while.body.preheader: ; preds = %entry | ||
%1 = add i32 %blockSize, -2 | ||
%xtraiter = and i32 %blkCnt.015, 3 | ||
%2 = icmp ult i32 %1, 3 | ||
br i1 %2, label %while.end.loopexit.unr-lcssa, label %while.body.preheader.new | ||
|
||
while.body.preheader.new: ; preds = %while.body.preheader | ||
%unroll_iter = and i32 %blkCnt.015, -4 | ||
br label %while.body | ||
|
||
while.body: ; preds = %while.body, %while.body.preheader.new | ||
%pSrc.addr.021.pn = phi i32* [ %pSrc, %while.body.preheader.new ], [ %pSrc.addr.021.3, %while.body ] | ||
%blkCnt.020 = phi i32 [ %blkCnt.015, %while.body.preheader.new ], [ %blkCnt.0.3, %while.body ] | ||
%outIndex.019 = phi i32 [ 0, %while.body.preheader.new ], [ %spec.select14.3, %while.body ] | ||
%out.018 = phi i32 [ %0, %while.body.preheader.new ], [ %spec.select.3, %while.body ] | ||
%niter = phi i32 [ %unroll_iter, %while.body.preheader.new ], [ %niter.nsub.3, %while.body ] | ||
%pSrc.addr.021 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn, i32 1 | ||
%3 = load i32, i32* %pSrc.addr.021, align 4 | ||
%cmp2 = icmp sgt i32 %out.018, %3 | ||
%sub3 = sub i32 %blockSize, %blkCnt.020 | ||
%spec.select = select i1 %cmp2, i32 %3, i32 %out.018 | ||
%spec.select14 = select i1 %cmp2, i32 %sub3, i32 %outIndex.019 | ||
%blkCnt.0 = add i32 %blkCnt.020, -1 | ||
%pSrc.addr.021.1 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn, i32 2 | ||
%4 = load i32, i32* %pSrc.addr.021.1, align 4 | ||
%cmp2.1 = icmp sgt i32 %spec.select, %4 | ||
%sub3.1 = sub i32 %blockSize, %blkCnt.0 | ||
%spec.select.1 = select i1 %cmp2.1, i32 %4, i32 %spec.select | ||
%spec.select14.1 = select i1 %cmp2.1, i32 %sub3.1, i32 %spec.select14 | ||
%blkCnt.0.1 = add i32 %blkCnt.020, -2 | ||
%pSrc.addr.021.2 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn, i32 3 | ||
%5 = load i32, i32* %pSrc.addr.021.2, align 4 | ||
%cmp2.2 = icmp sgt i32 %spec.select.1, %5 | ||
%sub3.2 = sub i32 %blockSize, %blkCnt.0.1 | ||
%spec.select.2 = select i1 %cmp2.2, i32 %5, i32 %spec.select.1 | ||
%spec.select14.2 = select i1 %cmp2.2, i32 %sub3.2, i32 %spec.select14.1 | ||
%blkCnt.0.2 = add i32 %blkCnt.020, -3 | ||
%pSrc.addr.021.3 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn, i32 4 | ||
%6 = load i32, i32* %pSrc.addr.021.3, align 4 | ||
%cmp2.3 = icmp sgt i32 %spec.select.2, %6 | ||
%sub3.3 = sub i32 %blockSize, %blkCnt.0.2 | ||
%spec.select.3 = select i1 %cmp2.3, i32 %6, i32 %spec.select.2 | ||
%spec.select14.3 = select i1 %cmp2.3, i32 %sub3.3, i32 %spec.select14.2 | ||
%blkCnt.0.3 = add i32 %blkCnt.020, -4 | ||
%niter.nsub.3 = add i32 %niter, -4 | ||
%niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0 | ||
br i1 %niter.ncmp.3, label %while.end.loopexit.unr-lcssa, label %while.body | ||
|
||
while.end.loopexit.unr-lcssa: ; preds = %while.body, %while.body.preheader | ||
%spec.select.lcssa.ph = phi i32 [ undef, %while.body.preheader ], [ %spec.select.3, %while.body ] | ||
%spec.select14.lcssa.ph = phi i32 [ undef, %while.body.preheader ], [ %spec.select14.3, %while.body ] | ||
%pSrc.addr.021.pn.unr = phi i32* [ %pSrc, %while.body.preheader ], [ %pSrc.addr.021.3, %while.body ] | ||
%blkCnt.020.unr = phi i32 [ %blkCnt.015, %while.body.preheader ], [ %blkCnt.0.3, %while.body ] | ||
%outIndex.019.unr = phi i32 [ 0, %while.body.preheader ], [ %spec.select14.3, %while.body ] | ||
%out.018.unr = phi i32 [ %0, %while.body.preheader ], [ %spec.select.3, %while.body ] | ||
%lcmp.mod.not = icmp eq i32 %xtraiter, 0 | ||
br i1 %lcmp.mod.not, label %while.end, label %while.body.epil | ||
|
||
while.body.epil: ; preds = %while.end.loopexit.unr-lcssa | ||
%pSrc.addr.021.epil = getelementptr inbounds i32, i32* %pSrc.addr.021.pn.unr, i32 1 | ||
%7 = load i32, i32* %pSrc.addr.021.epil, align 4 | ||
%cmp2.epil = icmp sgt i32 %out.018.unr, %7 | ||
%sub3.epil = sub i32 %blockSize, %blkCnt.020.unr | ||
%spec.select.epil = select i1 %cmp2.epil, i32 %7, i32 %out.018.unr | ||
%spec.select14.epil = select i1 %cmp2.epil, i32 %sub3.epil, i32 %outIndex.019.unr | ||
%epil.iter.cmp.not = icmp eq i32 %xtraiter, 1 | ||
br i1 %epil.iter.cmp.not, label %while.end, label %while.body.epil.1 | ||
|
||
while.end: ; preds = %while.end.loopexit.unr-lcssa, %while.body.epil.2, %while.body.epil.1, %while.body.epil, %entry | ||
%out.0.lcssa = phi i32 [ %0, %entry ], [ %spec.select.lcssa.ph, %while.end.loopexit.unr-lcssa ], [ %spec.select.epil, %while.body.epil ], [ %spec.select.epil.1, %while.body.epil.1 ], [ %spec.select.epil.2, %while.body.epil.2 ] | ||
%outIndex.0.lcssa = phi i32 [ 0, %entry ], [ %spec.select14.lcssa.ph, %while.end.loopexit.unr-lcssa ], [ %spec.select14.epil, %while.body.epil ], [ %spec.select14.epil.1, %while.body.epil.1 ], [ %spec.select14.epil.2, %while.body.epil.2 ] | ||
store i32 %out.0.lcssa, i32* %pResult, align 4 | ||
store i32 %outIndex.0.lcssa, i32* %pIndex, align 4 | ||
ret void | ||
|
||
while.body.epil.1: ; preds = %while.body.epil | ||
%blkCnt.0.epil = add i32 %blkCnt.020.unr, -1 | ||
%pSrc.addr.021.epil.1 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn.unr, i32 2 | ||
%8 = load i32, i32* %pSrc.addr.021.epil.1, align 4 | ||
%cmp2.epil.1 = icmp sgt i32 %spec.select.epil, %8 | ||
%sub3.epil.1 = sub i32 %blockSize, %blkCnt.0.epil | ||
%spec.select.epil.1 = select i1 %cmp2.epil.1, i32 %8, i32 %spec.select.epil | ||
%spec.select14.epil.1 = select i1 %cmp2.epil.1, i32 %sub3.epil.1, i32 %spec.select14.epil | ||
%epil.iter.cmp.1.not = icmp eq i32 %xtraiter, 2 | ||
br i1 %epil.iter.cmp.1.not, label %while.end, label %while.body.epil.2 | ||
|
||
while.body.epil.2: ; preds = %while.body.epil.1 | ||
%blkCnt.0.epil.1 = add i32 %blkCnt.020.unr, -2 | ||
%pSrc.addr.021.epil.2 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn.unr, i32 3 | ||
%9 = load i32, i32* %pSrc.addr.021.epil.2, align 4 | ||
%cmp2.epil.2 = icmp sgt i32 %spec.select.epil.1, %9 | ||
%sub3.epil.2 = sub i32 %blockSize, %blkCnt.0.epil.1 | ||
%spec.select.epil.2 = select i1 %cmp2.epil.2, i32 %9, i32 %spec.select.epil.1 | ||
%spec.select14.epil.2 = select i1 %cmp2.epil.2, i32 %sub3.epil.2, i32 %spec.select14.epil.1 | ||
br label %while.end | ||
} |