From 834ad102c377a4d1cdc6c601d9899b5dc0a1858b Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 5 Aug 2024 10:13:21 -0700 Subject: [PATCH] [SLP][NFC]ADd a test version with threshold=-15, NFC. --- .../SLPVectorizer/RISCV/complex-loads.ll | 335 ++++++++++++++++++ 1 file changed, 335 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index aa9a070a794509..3595f7772defda 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -mtriple riscv64-unknown-linux-gnu < %s --passes=slp-vectorizer -mattr=+v -slp-threshold=-20 | FileCheck %s +; RUN: opt -S -mtriple riscv64-unknown-linux-gnu < %s --passes=slp-vectorizer -mattr=+v -slp-threshold=-15 | FileCheck %s --check-prefix=THR15 define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.ptr, ptr %add.ptr64) { ; CHECK-LABEL: define i32 @test( @@ -359,6 +360,340 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]] ; CHECK-NEXT: ret i32 [[ADD113_3]] ; +; THR15-LABEL: define i32 @test( +; THR15-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] { +; THR15-NEXT: entry: +; THR15-NEXT: [[TMP0:%.*]] = load i8, ptr [[PIX1]], align 1 +; THR15-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32 +; THR15-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 4 +; THR15-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[PIX2]], i64 4 +; THR15-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[PIX1]], i64 1 +; THR15-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i8, ptr [[PIX2]], i64 2 +; THR15-NEXT: [[ARRAYIDX25:%.*]] = getelementptr i8, ptr [[PIX1]], i64 6 +; THR15-NEXT: [[ARRAYIDX27:%.*]] = getelementptr i8, ptr [[PIX2]], i64 6 +; THR15-NEXT: [[ARRAYIDX32:%.*]] = getelementptr i8, ptr [[PIX1]], i64 3 +; THR15-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1 +; THR15-NEXT: [[CONV33:%.*]] = zext i8 [[TMP1]] to i32 +; THR15-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]] +; THR15-NEXT: [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]] +; THR15-NEXT: [[TMP2:%.*]] = load i8, ptr [[ADD_PTR3]], align 1 +; THR15-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP2]] to i32 +; THR15-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 4 +; THR15-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 4 +; THR15-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 1 +; THR15-NEXT: [[ARRAYIDX22_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 2 +; THR15-NEXT: [[ARRAYIDX25_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 6 +; THR15-NEXT: [[ARRAYIDX27_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 6 +; THR15-NEXT: [[ARRAYIDX32_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 3 +; THR15-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX32_1]], align 1 +; THR15-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP3]] to i32 +; THR15-NEXT: [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] +; THR15-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] +; THR15-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4 +; THR15-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4 +; THR15-NEXT: [[TMP4:%.*]] = load <2 x i8>, ptr [[ADD_PTR_1]], align 1 +; THR15-NEXT: [[TMP5:%.*]] = zext <2 x i8> [[TMP4]] to <2 x i32> +; THR15-NEXT: [[TMP6:%.*]] = load <2 x i8>, ptr [[ADD_PTR64_1]], align 1 +; THR15-NEXT: [[TMP7:%.*]] = zext <2 x i8> [[TMP6]] to <2 x i32> +; THR15-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP5]], [[TMP7]] +; THR15-NEXT: [[TMP9:%.*]] = load <2 x i8>, ptr [[ARRAYIDX3_2]], align 1 +; THR15-NEXT: [[TMP10:%.*]] = zext <2 x i8> [[TMP9]] to <2 x i32> +; THR15-NEXT: [[TMP11:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_2]], align 1 +; THR15-NEXT: [[TMP12:%.*]] = zext <2 x i8> [[TMP11]] to <2 x i32> +; THR15-NEXT: [[TMP13:%.*]] = sub <2 x i32> [[TMP10]], [[TMP12]] +; THR15-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[TMP13]], +; THR15-NEXT: [[TMP15:%.*]] = add <2 x i32> [[TMP14]], [[TMP8]] +; THR15-NEXT: [[ARRAYIDX20_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 2 +; THR15-NEXT: [[ARRAYIDX22_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 2 +; THR15-NEXT: [[ARRAYIDX25_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 6 +; THR15-NEXT: [[ARRAYIDX27_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 6 +; THR15-NEXT: [[TMP16:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_2]], align 1 +; THR15-NEXT: [[TMP17:%.*]] = zext <2 x i8> [[TMP16]] to <2 x i32> +; THR15-NEXT: [[TMP18:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_2]], align 1 +; THR15-NEXT: [[TMP19:%.*]] = zext <2 x i8> [[TMP18]] to <2 x i32> +; THR15-NEXT: [[TMP20:%.*]] = sub <2 x i32> [[TMP17]], [[TMP19]] +; THR15-NEXT: [[TMP21:%.*]] = load <2 x i8>, ptr [[ARRAYIDX25_2]], align 1 +; THR15-NEXT: [[TMP22:%.*]] = zext <2 x i8> [[TMP21]] to <2 x i32> +; THR15-NEXT: [[TMP23:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_2]], align 1 +; THR15-NEXT: [[TMP24:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> +; THR15-NEXT: [[TMP25:%.*]] = sub <2 x i32> [[TMP22]], [[TMP24]] +; THR15-NEXT: [[TMP26:%.*]] = shl <2 x i32> [[TMP25]], +; THR15-NEXT: [[TMP27:%.*]] = add <2 x i32> [[TMP26]], [[TMP20]] +; THR15-NEXT: [[TMP28:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0 +; THR15-NEXT: [[TMP29:%.*]] = extractelement <2 x i32> [[TMP15]], i32 1 +; THR15-NEXT: [[ADD44_2:%.*]] = add i32 [[TMP29]], [[TMP28]] +; THR15-NEXT: [[SUB45_2:%.*]] = sub i32 [[TMP28]], [[TMP29]] +; THR15-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP27]], i32 0 +; THR15-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP27]], i32 1 +; THR15-NEXT: [[ADD46_2:%.*]] = add i32 [[TMP31]], [[TMP30]] +; THR15-NEXT: [[SUB47_2:%.*]] = sub i32 [[TMP30]], [[TMP31]] +; THR15-NEXT: [[ADD48_2:%.*]] = add i32 [[ADD46_2]], [[ADD44_2]] +; THR15-NEXT: [[SUB51_2:%.*]] = sub i32 [[ADD44_2]], [[ADD46_2]] +; THR15-NEXT: [[ADD55_2:%.*]] = add i32 [[SUB47_2]], [[SUB45_2]] +; THR15-NEXT: [[SUB59_2:%.*]] = sub i32 [[SUB45_2]], [[SUB47_2]] +; THR15-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr i8, ptr null, i64 4 +; THR15-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4 +; THR15-NEXT: [[TMP32:%.*]] = load <2 x i8>, ptr null, align 1 +; THR15-NEXT: [[TMP33:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32> +; THR15-NEXT: [[TMP34:%.*]] = load <2 x i8>, ptr null, align 1 +; THR15-NEXT: [[TMP35:%.*]] = zext <2 x i8> [[TMP34]] to <2 x i32> +; THR15-NEXT: [[TMP36:%.*]] = sub <2 x i32> [[TMP33]], [[TMP35]] +; THR15-NEXT: [[TMP37:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX3_3]], i64 -4, <2 x i1> , i32 2) +; THR15-NEXT: [[TMP38:%.*]] = zext <2 x i8> [[TMP37]] to <2 x i32> +; THR15-NEXT: [[TMP39:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 +; THR15-NEXT: [[TMP40:%.*]] = zext <2 x i8> [[TMP39]] to <2 x i32> +; THR15-NEXT: [[TMP41:%.*]] = sub <2 x i32> [[TMP38]], [[TMP40]] +; THR15-NEXT: [[TMP42:%.*]] = shl <2 x i32> [[TMP41]], +; THR15-NEXT: [[TMP43:%.*]] = add <2 x i32> [[TMP42]], [[TMP36]] +; THR15-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 +; THR15-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 +; THR15-NEXT: [[TMP44:%.*]] = load i8, ptr null, align 1 +; THR15-NEXT: [[ARRAYIDX27_3:%.*]] = getelementptr i8, ptr null, i64 6 +; THR15-NEXT: [[TMP45:%.*]] = load i8, ptr null, align 1 +; THR15-NEXT: [[TMP46:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_3]], align 1 +; THR15-NEXT: [[TMP47:%.*]] = zext <2 x i8> [[TMP46]] to <2 x i32> +; THR15-NEXT: [[TMP48:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 +; THR15-NEXT: [[TMP49:%.*]] = zext <2 x i8> [[TMP48]] to <2 x i32> +; THR15-NEXT: [[TMP50:%.*]] = sub <2 x i32> [[TMP47]], [[TMP49]] +; THR15-NEXT: [[TMP51:%.*]] = insertelement <2 x i8> poison, i8 [[TMP44]], i32 0 +; THR15-NEXT: [[TMP52:%.*]] = insertelement <2 x i8> [[TMP51]], i8 [[TMP45]], i32 1 +; THR15-NEXT: [[TMP53:%.*]] = zext <2 x i8> [[TMP52]] to <2 x i32> +; THR15-NEXT: [[TMP54:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 +; THR15-NEXT: [[TMP55:%.*]] = zext <2 x i8> [[TMP54]] to <2 x i32> +; THR15-NEXT: [[TMP56:%.*]] = sub <2 x i32> [[TMP53]], [[TMP55]] +; THR15-NEXT: [[TMP57:%.*]] = shl <2 x i32> [[TMP56]], +; THR15-NEXT: [[TMP58:%.*]] = add <2 x i32> [[TMP57]], [[TMP50]] +; THR15-NEXT: [[TMP59:%.*]] = extractelement <2 x i32> [[TMP43]], i32 0 +; THR15-NEXT: [[TMP60:%.*]] = extractelement <2 x i32> [[TMP43]], i32 1 +; THR15-NEXT: [[ADD44_3:%.*]] = add i32 [[TMP60]], [[TMP59]] +; THR15-NEXT: [[SUB45_3:%.*]] = sub i32 [[TMP59]], [[TMP60]] +; THR15-NEXT: [[TMP61:%.*]] = extractelement <2 x i32> [[TMP58]], i32 0 +; THR15-NEXT: [[TMP62:%.*]] = extractelement <2 x i32> [[TMP58]], i32 1 +; THR15-NEXT: [[ADD46_3:%.*]] = add i32 [[TMP62]], [[TMP61]] +; THR15-NEXT: [[SUB47_3:%.*]] = sub i32 [[TMP61]], [[TMP62]] +; THR15-NEXT: [[ADD48_3:%.*]] = add i32 [[ADD46_3]], [[ADD44_3]] +; THR15-NEXT: [[SUB51_3:%.*]] = sub i32 [[ADD44_3]], [[ADD46_3]] +; THR15-NEXT: [[ADD55_3:%.*]] = add i32 [[SUB47_3]], [[SUB45_3]] +; THR15-NEXT: [[SUB59_3:%.*]] = sub i32 [[SUB45_3]], [[SUB47_3]] +; THR15-NEXT: [[ADD94:%.*]] = add i32 [[ADD48_3]], [[ADD48_2]] +; THR15-NEXT: [[SUB102:%.*]] = sub i32 [[ADD48_2]], [[ADD48_3]] +; THR15-NEXT: [[TMP63:%.*]] = extractelement <2 x i32> [[TMP33]], i32 0 +; THR15-NEXT: [[SHR_I:%.*]] = lshr i32 [[TMP63]], 15 +; THR15-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537 +; THR15-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535 +; THR15-NEXT: [[SHR_I49:%.*]] = lshr i32 [[ADD46_2]], 15 +; THR15-NEXT: [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537 +; THR15-NEXT: [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535 +; THR15-NEXT: [[ADD94_2:%.*]] = add i32 [[SUB51_3]], [[SUB51_2]] +; THR15-NEXT: [[SUB102_2:%.*]] = sub i32 [[SUB51_2]], [[SUB51_3]] +; THR15-NEXT: [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15 +; THR15-NEXT: [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537 +; THR15-NEXT: [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535 +; THR15-NEXT: [[ADD94_3:%.*]] = add i32 [[SUB59_3]], [[SUB59_2]] +; THR15-NEXT: [[SUB102_3:%.*]] = sub i32 [[SUB59_2]], [[SUB59_3]] +; THR15-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[CONV]], 15 +; THR15-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 +; THR15-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 +; THR15-NEXT: [[TMP64:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1 +; THR15-NEXT: [[TMP65:%.*]] = zext <2 x i8> [[TMP64]] to <2 x i32> +; THR15-NEXT: [[TMP66:%.*]] = load <2 x i8>, ptr [[PIX2]], align 1 +; THR15-NEXT: [[TMP67:%.*]] = zext <2 x i8> [[TMP66]] to <2 x i32> +; THR15-NEXT: [[TMP68:%.*]] = load <2 x i8>, ptr [[ARRAYIDX3]], align 1 +; THR15-NEXT: [[TMP69:%.*]] = zext <2 x i8> [[TMP68]] to <2 x i32> +; THR15-NEXT: [[TMP70:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5]], align 1 +; THR15-NEXT: [[TMP71:%.*]] = zext <2 x i8> [[TMP70]] to <2 x i32> +; THR15-NEXT: [[TMP72:%.*]] = sub <2 x i32> [[TMP69]], [[TMP71]] +; THR15-NEXT: [[TMP73:%.*]] = shl <2 x i32> [[TMP72]], +; THR15-NEXT: [[TMP74:%.*]] = shufflevector <2 x i32> [[TMP65]], <2 x i32> poison, <2 x i32> +; THR15-NEXT: [[TMP75:%.*]] = insertelement <2 x i32> [[TMP74]], i32 [[CONV]], i32 0 +; THR15-NEXT: [[TMP76:%.*]] = sub <2 x i32> [[TMP75]], [[TMP67]] +; THR15-NEXT: [[TMP77:%.*]] = add <2 x i32> [[TMP73]], [[TMP76]] +; THR15-NEXT: [[TMP78:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22]], align 1 +; THR15-NEXT: [[TMP79:%.*]] = zext <2 x i8> [[TMP78]] to <2 x i32> +; THR15-NEXT: [[TMP80:%.*]] = load <2 x i8>, ptr [[ARRAYIDX25]], align 1 +; THR15-NEXT: [[TMP81:%.*]] = zext <2 x i8> [[TMP80]] to <2 x i32> +; THR15-NEXT: [[TMP82:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27]], align 1 +; THR15-NEXT: [[TMP83:%.*]] = zext <2 x i8> [[TMP82]] to <2 x i32> +; THR15-NEXT: [[TMP84:%.*]] = sub <2 x i32> [[TMP81]], [[TMP83]] +; THR15-NEXT: [[TMP85:%.*]] = shl <2 x i32> [[TMP84]], +; THR15-NEXT: [[TMP86:%.*]] = insertelement <2 x i32> [[TMP74]], i32 [[CONV33]], i32 1 +; THR15-NEXT: [[TMP87:%.*]] = sub <2 x i32> [[TMP86]], [[TMP79]] +; THR15-NEXT: [[TMP88:%.*]] = add <2 x i32> [[TMP85]], [[TMP87]] +; THR15-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP77]], i32 0 +; THR15-NEXT: [[TMP90:%.*]] = extractelement <2 x i32> [[TMP77]], i32 1 +; THR15-NEXT: [[ADD44:%.*]] = add i32 [[TMP90]], [[TMP89]] +; THR15-NEXT: [[SUB45:%.*]] = sub i32 [[TMP89]], [[TMP90]] +; THR15-NEXT: [[TMP91:%.*]] = extractelement <2 x i32> [[TMP88]], i32 0 +; THR15-NEXT: [[TMP92:%.*]] = extractelement <2 x i32> [[TMP88]], i32 1 +; THR15-NEXT: [[ADD46:%.*]] = add i32 [[TMP92]], [[TMP91]] +; THR15-NEXT: [[SUB47:%.*]] = sub i32 [[TMP91]], [[TMP92]] +; THR15-NEXT: [[ADD48:%.*]] = add i32 [[ADD46]], [[ADD44]] +; THR15-NEXT: [[SUB51:%.*]] = sub i32 [[ADD44]], [[ADD46]] +; THR15-NEXT: [[ADD55:%.*]] = add i32 [[SUB47]], [[SUB45]] +; THR15-NEXT: [[SUB59:%.*]] = sub i32 [[SUB45]], [[SUB47]] +; THR15-NEXT: [[SHR_I59:%.*]] = lshr i32 [[ADD46]], 15 +; THR15-NEXT: [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537 +; THR15-NEXT: [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535 +; THR15-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[SUB47]], 15 +; THR15-NEXT: [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537 +; THR15-NEXT: [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535 +; THR15-NEXT: [[TMP93:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 +; THR15-NEXT: [[TMP94:%.*]] = zext <2 x i8> [[TMP93]] to <2 x i32> +; THR15-NEXT: [[TMP95:%.*]] = load <2 x i8>, ptr [[ADD_PTR644]], align 1 +; THR15-NEXT: [[TMP96:%.*]] = zext <2 x i8> [[TMP95]] to <2 x i32> +; THR15-NEXT: [[TMP97:%.*]] = load <2 x i8>, ptr [[ARRAYIDX3_1]], align 1 +; THR15-NEXT: [[TMP98:%.*]] = zext <2 x i8> [[TMP97]] to <2 x i32> +; THR15-NEXT: [[TMP99:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_1]], align 1 +; THR15-NEXT: [[TMP100:%.*]] = zext <2 x i8> [[TMP99]] to <2 x i32> +; THR15-NEXT: [[TMP101:%.*]] = sub <2 x i32> [[TMP98]], [[TMP100]] +; THR15-NEXT: [[TMP102:%.*]] = shl <2 x i32> [[TMP101]], +; THR15-NEXT: [[TMP103:%.*]] = shufflevector <2 x i32> [[TMP94]], <2 x i32> poison, <2 x i32> +; THR15-NEXT: [[TMP104:%.*]] = insertelement <2 x i32> [[TMP103]], i32 [[CONV_1]], i32 0 +; THR15-NEXT: [[TMP105:%.*]] = sub <2 x i32> [[TMP104]], [[TMP96]] +; THR15-NEXT: [[TMP106:%.*]] = add <2 x i32> [[TMP102]], [[TMP105]] +; THR15-NEXT: [[TMP107:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_1]], align 1 +; THR15-NEXT: [[TMP108:%.*]] = zext <2 x i8> [[TMP107]] to <2 x i32> +; THR15-NEXT: [[TMP109:%.*]] = load <2 x i8>, ptr [[ARRAYIDX25_1]], align 1 +; THR15-NEXT: [[TMP110:%.*]] = zext <2 x i8> [[TMP109]] to <2 x i32> +; THR15-NEXT: [[TMP111:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_1]], align 1 +; THR15-NEXT: [[TMP112:%.*]] = zext <2 x i8> [[TMP111]] to <2 x i32> +; THR15-NEXT: [[TMP113:%.*]] = sub <2 x i32> [[TMP110]], [[TMP112]] +; THR15-NEXT: [[TMP114:%.*]] = shl <2 x i32> [[TMP113]], +; THR15-NEXT: [[TMP115:%.*]] = insertelement <2 x i32> [[TMP103]], i32 [[CONV33_1]], i32 1 +; THR15-NEXT: [[TMP116:%.*]] = sub <2 x i32> [[TMP115]], [[TMP108]] +; THR15-NEXT: [[TMP117:%.*]] = add <2 x i32> [[TMP114]], [[TMP116]] +; THR15-NEXT: [[TMP118:%.*]] = extractelement <2 x i32> [[TMP106]], i32 0 +; THR15-NEXT: [[TMP119:%.*]] = extractelement <2 x i32> [[TMP106]], i32 1 +; THR15-NEXT: [[ADD44_1:%.*]] = add i32 [[TMP119]], [[TMP118]] +; THR15-NEXT: [[SUB45_1:%.*]] = sub i32 [[TMP118]], [[TMP119]] +; THR15-NEXT: [[TMP120:%.*]] = extractelement <2 x i32> [[TMP117]], i32 0 +; THR15-NEXT: [[TMP121:%.*]] = extractelement <2 x i32> [[TMP117]], i32 1 +; THR15-NEXT: [[ADD46_1:%.*]] = add i32 [[TMP121]], [[TMP120]] +; THR15-NEXT: [[SUB47_1:%.*]] = sub i32 [[TMP120]], [[TMP121]] +; THR15-NEXT: [[ADD48_1:%.*]] = add i32 [[ADD46_1]], [[ADD44_1]] +; THR15-NEXT: [[SUB51_1:%.*]] = sub i32 [[ADD44_1]], [[ADD46_1]] +; THR15-NEXT: [[ADD55_1:%.*]] = add i32 [[SUB47_1]], [[SUB45_1]] +; THR15-NEXT: [[SUB59_1:%.*]] = sub i32 [[SUB45_1]], [[SUB47_1]] +; THR15-NEXT: [[SHR_I54:%.*]] = lshr i32 [[ADD46_1]], 15 +; THR15-NEXT: [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537 +; THR15-NEXT: [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535 +; THR15-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[SUB47_1]], 15 +; THR15-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 +; THR15-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 +; THR15-NEXT: [[TMP122:%.*]] = lshr <2 x i32> [[TMP94]], +; THR15-NEXT: [[TMP123:%.*]] = and <2 x i32> [[TMP122]], +; THR15-NEXT: [[TMP124:%.*]] = mul <2 x i32> [[TMP123]], +; THR15-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD48]] +; THR15-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_1]] +; THR15-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] +; THR15-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD94]] +; THR15-NEXT: [[ADD105:%.*]] = add i32 [[SUB102]], [[SUB86]] +; THR15-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB102]] +; THR15-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]] +; THR15-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP63]] +; THR15-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51]], [[ADD105]] +; THR15-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[ADD46_2]] +; THR15-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]] +; THR15-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[ADD46_1]] +; THR15-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]] +; THR15-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[ADD46]] +; THR15-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] +; THR15-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] +; THR15-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I63]] +; THR15-NEXT: [[TMP125:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_2]], i32 0 +; THR15-NEXT: [[TMP126:%.*]] = shufflevector <2 x i32> [[TMP125]], <2 x i32> poison, <2 x i32> zeroinitializer +; THR15-NEXT: [[TMP127:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_3]], i32 0 +; THR15-NEXT: [[TMP128:%.*]] = shufflevector <2 x i32> [[TMP127]], <2 x i32> poison, <2 x i32> zeroinitializer +; THR15-NEXT: [[TMP129:%.*]] = sub <2 x i32> [[TMP126]], [[TMP128]] +; THR15-NEXT: [[TMP130:%.*]] = add <2 x i32> [[TMP126]], [[TMP128]] +; THR15-NEXT: [[TMP131:%.*]] = shufflevector <2 x i32> [[TMP129]], <2 x i32> [[TMP130]], <2 x i32> +; THR15-NEXT: [[TMP132:%.*]] = lshr <2 x i32> [[TMP5]], +; THR15-NEXT: [[TMP133:%.*]] = and <2 x i32> [[TMP132]], +; THR15-NEXT: [[TMP134:%.*]] = mul <2 x i32> [[TMP133]], +; THR15-NEXT: [[TMP135:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55]], i32 0 +; THR15-NEXT: [[TMP136:%.*]] = shufflevector <2 x i32> [[TMP135]], <2 x i32> poison, <2 x i32> zeroinitializer +; THR15-NEXT: [[TMP137:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_1]], i32 0 +; THR15-NEXT: [[TMP138:%.*]] = shufflevector <2 x i32> [[TMP137]], <2 x i32> poison, <2 x i32> zeroinitializer +; THR15-NEXT: [[TMP139:%.*]] = sub <2 x i32> [[TMP136]], [[TMP138]] +; THR15-NEXT: [[TMP140:%.*]] = add <2 x i32> [[TMP136]], [[TMP138]] +; THR15-NEXT: [[TMP141:%.*]] = shufflevector <2 x i32> [[TMP139]], <2 x i32> [[TMP140]], <2 x i32> +; THR15-NEXT: [[TMP142:%.*]] = extractelement <2 x i32> [[TMP131]], i32 1 +; THR15-NEXT: [[TMP143:%.*]] = extractelement <2 x i32> [[TMP141]], i32 1 +; THR15-NEXT: [[SUB104_1:%.*]] = sub i32 [[TMP143]], [[TMP142]] +; THR15-NEXT: [[TMP144:%.*]] = add <2 x i32> [[TMP131]], [[TMP141]] +; THR15-NEXT: [[TMP145:%.*]] = extractelement <2 x i32> [[TMP131]], i32 0 +; THR15-NEXT: [[TMP146:%.*]] = extractelement <2 x i32> [[TMP141]], i32 0 +; THR15-NEXT: [[TMP147:%.*]] = shufflevector <2 x i32> [[TMP141]], <2 x i32> [[TMP131]], <2 x i32> +; THR15-NEXT: [[SUB106_1:%.*]] = sub i32 [[TMP146]], [[TMP145]] +; THR15-NEXT: [[TMP148:%.*]] = add <2 x i32> [[TMP134]], [[TMP144]] +; THR15-NEXT: [[TMP149:%.*]] = xor <2 x i32> [[TMP148]], [[TMP5]] +; THR15-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]] +; THR15-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[SUB47_1]] +; THR15-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]] +; THR15-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[SUB47]] +; THR15-NEXT: [[TMP150:%.*]] = extractelement <2 x i32> [[TMP149]], i32 0 +; THR15-NEXT: [[ADD108_1:%.*]] = add i32 [[TMP150]], [[ADD113]] +; THR15-NEXT: [[TMP151:%.*]] = extractelement <2 x i32> [[TMP149]], i32 1 +; THR15-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[TMP151]] +; THR15-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]] +; THR15-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I63_1]] +; THR15-NEXT: [[ADD78_2:%.*]] = add i32 [[SUB51_1]], [[SUB51]] +; THR15-NEXT: [[SUB86_2:%.*]] = sub i32 [[SUB51]], [[SUB51_1]] +; THR15-NEXT: [[TMP152:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_2]], i32 0 +; THR15-NEXT: [[TMP153:%.*]] = shufflevector <2 x i32> [[TMP152]], <2 x i32> poison, <2 x i32> zeroinitializer +; THR15-NEXT: [[TMP154:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_2]], i32 0 +; THR15-NEXT: [[TMP155:%.*]] = shufflevector <2 x i32> [[TMP154]], <2 x i32> poison, <2 x i32> zeroinitializer +; THR15-NEXT: [[TMP156:%.*]] = add <2 x i32> [[TMP153]], [[TMP155]] +; THR15-NEXT: [[TMP157:%.*]] = sub <2 x i32> [[TMP153]], [[TMP155]] +; THR15-NEXT: [[TMP158:%.*]] = shufflevector <2 x i32> [[TMP156]], <2 x i32> [[TMP157]], <2 x i32> +; THR15-NEXT: [[ADD105_2:%.*]] = add i32 [[SUB102_2]], [[SUB86_2]] +; THR15-NEXT: [[SUB106_2:%.*]] = sub i32 [[SUB86_2]], [[SUB102_2]] +; THR15-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD105_2]] +; THR15-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] +; THR15-NEXT: [[TMP159:%.*]] = add <2 x i32> [[TMP124]], [[TMP158]] +; THR15-NEXT: [[TMP160:%.*]] = xor <2 x i32> [[TMP159]], [[TMP94]] +; THR15-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[ADD44]], 15 +; THR15-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 +; THR15-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 +; THR15-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] +; THR15-NEXT: [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[ADD44]] +; THR15-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_1]] +; THR15-NEXT: [[TMP161:%.*]] = extractelement <2 x i32> [[TMP160]], i32 0 +; THR15-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP161]] +; THR15-NEXT: [[TMP162:%.*]] = extractelement <2 x i32> [[TMP160]], i32 1 +; THR15-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[TMP162]] +; THR15-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I63_2]] +; THR15-NEXT: [[ADD78_3:%.*]] = add i32 [[SUB59_1]], [[SUB59]] +; THR15-NEXT: [[SUB86_3:%.*]] = sub i32 [[SUB59]], [[SUB59_1]] +; THR15-NEXT: [[TMP163:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_3]], i32 0 +; THR15-NEXT: [[TMP164:%.*]] = shufflevector <2 x i32> [[TMP163]], <2 x i32> poison, <2 x i32> zeroinitializer +; THR15-NEXT: [[TMP165:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_3]], i32 0 +; THR15-NEXT: [[TMP166:%.*]] = shufflevector <2 x i32> [[TMP165]], <2 x i32> poison, <2 x i32> zeroinitializer +; THR15-NEXT: [[TMP167:%.*]] = add <2 x i32> [[TMP164]], [[TMP166]] +; THR15-NEXT: [[TMP168:%.*]] = sub <2 x i32> [[TMP164]], [[TMP166]] +; THR15-NEXT: [[TMP169:%.*]] = shufflevector <2 x i32> [[TMP167]], <2 x i32> [[TMP168]], <2 x i32> +; THR15-NEXT: [[ADD105_3:%.*]] = add i32 [[SUB102_3]], [[SUB86_3]] +; THR15-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]] +; THR15-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_3]] +; THR15-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV]] +; THR15-NEXT: [[TMP170:%.*]] = lshr <2 x i32> [[TMP65]], +; THR15-NEXT: [[TMP171:%.*]] = and <2 x i32> [[TMP170]], +; THR15-NEXT: [[TMP172:%.*]] = mul <2 x i32> [[TMP171]], +; THR15-NEXT: [[TMP173:%.*]] = add <2 x i32> [[TMP172]], [[TMP169]] +; THR15-NEXT: [[TMP174:%.*]] = xor <2 x i32> [[TMP173]], [[TMP65]] +; THR15-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 +; THR15-NEXT: [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537 +; THR15-NEXT: [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535 +; THR15-NEXT: [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]] +; THR15-NEXT: [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]] +; THR15-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_2]] +; THR15-NEXT: [[TMP175:%.*]] = extractelement <2 x i32> [[TMP174]], i32 0 +; THR15-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP175]] +; THR15-NEXT: [[TMP176:%.*]] = extractelement <2 x i32> [[TMP174]], i32 1 +; THR15-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP176]] +; THR15-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]] +; THR15-NEXT: ret i32 [[ADD113_3]] +; entry: %0 = load i8, ptr %pix1, align 1 %conv = zext i8 %0 to i32