From 902b0d3817ff37e5d1aeabc340a287f5671b23b5 Mon Sep 17 00:00:00 2001 From: chengzhiwei Date: Thu, 8 Aug 2024 15:02:45 +0800 Subject: [PATCH] [instCombine][bugfix] Fix crash caused by using of cast in instCombineSVECmpNE. Func instCombineSVECmpNE is used to identify specific pattern of instruction 'svecmene', and then predict its result, use the result to replace instruction 'svecmene'. The specific pattern can be descriped below: 1.The svecmpne must compare all elements of vec. 2.The svecmpne inst compare its ves with zero. 3.The vec in svecmpne inst is generated by inst dupqlane, and the copy value of this dupqlane must be zero. In NO.3 above, func instCombineSVECmpNE uses 'cast' to transform op1 of dupqlane without checking if the cast is success, then generate a crash in some situation. --- .../AArch64/AArch64TargetTransformInfo.cpp | 3 +- .../AArch64/sve-inst-combine-cmpne.ll | 411 ++++++++++++++++++ 2 files changed, 413 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-inst-combine-cmpne.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 79c0e45e3aa5b50..3b4bb3dcb1b191e 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1174,7 +1174,8 @@ static std::optional instCombineSVECmpNE(InstCombiner &IC, return std::nullopt; // Where the dupq is a lane 0 replicate of a vector insert - if (!cast(DupQLane->getArgOperand(1))->isZero()) + auto *DupQLaneIdx = dyn_cast(DupQLane->getArgOperand(1)); + if (!DupQLaneIdx || !DupQLaneIdx->isZero()) return std::nullopt; auto *VecIns = dyn_cast(DupQLane->getArgOperand(0)); diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-inst-combine-cmpne.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-inst-combine-cmpne.ll new file mode 100644 index 000000000000000..1e202b631758bc2 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-inst-combine-cmpne.ll @@ -0,0 +1,411 @@ +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; DUPQ b8 + +define @dupq_b_idx(i64 %idx) #0 { + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv16i8.v16i8( undef, + <16 x i8> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 %idx) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1, %3, %4) + ret %5 + ; CHECK: %4 = tail call @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1, %3, zeroinitializer) + ; CHECK-NEXT: ret %4 +} + +define @dupq_b_0() #0 { +; CHECK-LABEL: @dupq_b_0( +; CHECK: ret zeroinitializer + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv16i8.v16i8( undef, + <16 x i8> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1, %3, %4) + ret %5 +} + +define @dupq_b_d() #0 { +; CHECK-LABEL: @dupq_b_d( +; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: %2 = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) +; CHECK-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv16i8.v16i8( undef, + <16 x i8> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1, %3, %4) + ret %5 +} + +define @dupq_b_w() #0 { +; CHECK-LABEL: @dupq_b_w( +; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: %2 = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) +; CHECK-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv16i8.v16i8( undef, + <16 x i8> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1, %3, %4) + ret %5 +} + +define @dupq_b_h() #0 { +; CHECK-LABEL: @dupq_b_h( +; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: %2 = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) +; CHECK-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv16i8.v16i8( undef, + <16 x i8> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1, %3, %4) + ret %5 +} + +define @dupq_b_b() #0 { +; CHECK-LABEL: @dupq_b_b( +; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv16i8.v16i8( undef, + <16 x i8> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1, %3, %4) + ret %5 +} + +; DUPQ b16 + +define @dupq_h_0() #0 { +; CHECK-LABEL: @dupq_h_0( +; CHECK: ret zeroinitializer + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv8i16.v8i16( undef, + <8 x i16> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv8i16( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1, %3, %4) + ret %5 +} + +define @dupq_h_d() #0 { +; CHECK-LABEL: @dupq_h_d( +; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: %2 = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) +; CHECK-NEXT: %3 = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2) +; CHECK-NEXT: ret %3 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv8i16.v8i16( undef, + <8 x i16> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv8i16( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1, %3, %4) + ret %5 +} + +define @dupq_h_w() #0 { +; CHECK-LABEL: @dupq_h_w( +; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: %2 = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) +; CHECK-NEXT: %3 = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2) +; CHECK-NEXT: ret %3 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv8i16.v8i16( undef, + <8 x i16> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv8i16( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1, %3, %4) + ret %5 +} + +define @dupq_h_h() #0 { +; CHECK-LABEL: @dupq_h_h( +; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv8i16.v8i16( undef, + <8 x i16> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv8i16( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1, %3, %4) + ret %5 +} + +; DUPQ b32 + +define @dupq_w_0() #0 { +; CHECK-LABEL: @dupq_w_0( +; CHECK: ret zeroinitializer + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv4i32.v4i32( undef, + <4 x i32> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv4i32( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv4i32( %1, %3, %4) + ret %5 +} + +define @dupq_w_d() #0 { +; CHECK-LABEL: @dupq_w_d( +; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: %2 = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) +; CHECK-NEXT: %3 = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %2) +; CHECK-NEXT: ret %3 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv4i32.v4i32( undef, + <4 x i32> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv4i32( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv4i32( %1, %3, %4) + ret %5 +} + +define @dupq_w_w() #0 { +; CHECK-LABEL: @dupq_w_w( +; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv4i32.v4i32( undef, + <4 x i32> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv4i32( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv4i32( %1, %3, %4) + ret %5 +} + +; DUPQ b64 + +define @dupq_d_0() #0 { +; CHECK-LABEL: @dupq_d_0( +; CHECK: ret zeroinitializer + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv2i64.v2i64( undef, + <2 x i64> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.nxv2i64( %1, %3, %4) + ret %5 +} + +define @dupq_d_d() #0 { +; CHECK-LABEL: @dupq_d_d( +; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv2i64.v2i64( undef, + <2 x i64> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.nxv2i64( %1, %3, %4) + ret %5 +} + +; Cases that cannot be converted + +define @dupq_neg1() #0 { +; CHECK-LABEL: @dupq_neg1( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv2i64.v2i64( undef, + <2 x i64> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.nxv2i64( %1, %3, %4) + ret %5 +} + +define @dupq_neg2() #0 { +; CHECK-LABEL: @dupq_neg2( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv4i32.v4i32( undef, + <4 x i32> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv4i32( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv4i32( %1, %3, %4) + ret %5 +} + +define @dupq_neg3() #0 { +; CHECK-LABEL: @dupq_neg3( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv4i32.v4i32( undef, + <4 x i32> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv4i32( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv4i32( %1, %3, %4) + ret %5 +} + +define @dupq_neg4() #0 { +; CHECK-LABEL: @dupq_neg4( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv4i32.v4i32( undef, + <4 x i32> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv4i32( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv4i32( %1, %3, %4) + ret %5 +} + +define @dupq_neg5() #0 { +; CHECK-LABEL: @dupq_neg5( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv4i32.v4i32( undef, + <4 x i32> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv4i32( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.wide.nxv4i32( %1, %3, %4) + ret %5 +} + +define @dupq_neg6(i1 %a) #0 { +; CHECK-LABEL: @dupq_neg6( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = zext i1 %a to i32 + %3 = insertelement <4 x i32> , i32 %2, i32 3 + %4 = tail call @llvm.vector.insert.nxv4i32.v4i32( undef, <4 x i32> %3, i64 0) + %5 = tail call @llvm.aarch64.sve.dupq.lane.nxv4i32( %4 , i64 0) + %6 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %7 = tail call @llvm.aarch64.sve.cmpne.wide.nxv4i32( %1, %5, %6) + ret %7 +} + +define @dupq_neg7() #0 { +; CHECK-LABEL: @dupq_neg7( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv2i64.v2i64( undef, + <2 x i64> , i64 2) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.nxv2i64( %1, %3, %4) + ret %5 +} + +define @dupq_neg8() #0 { +; CHECK-LABEL: @dupq_neg8( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv2i64.v2i64( undef, + <2 x i64> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %2 , i64 1) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.nxv2i64( %1, %3, %4) + ret %5 +} + +define @dupq_neg9( %x) #0 { +; CHECK-LABEL: @dupq_neg9( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv2i64.v2i64( %x, + <2 x i64> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.nxv2i64( %1, %3, %4) + ret %5 +} + +define @dupq_neg10() #0 { +; CHECK-LABEL: @dupq_neg10( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv2i64.v2i64( undef, + <2 x i64> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 1) + %5 = tail call @llvm.aarch64.sve.cmpne.nxv2i64( %1, %3, %4) + ret %5 +} + +define @dupq_neg11( %pg) #0 { +; CHECK-LABEL: @dupq_neg11( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.vector.insert.nxv2i64.v2i64( undef, + <2 x i64> , i64 0) + %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %1 , i64 0) + %3 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %4 = tail call @llvm.aarch64.sve.cmpne.nxv2i64( %pg, %2, %3) + ret %4 +} + +define @dupq_neg12() #0 { +; CHECK-LABEL: @dupq_neg12( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 15) + %2 = tail call @llvm.vector.insert.nxv2i64.v2i64( undef, + <2 x i64> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + %5 = tail call @llvm.aarch64.sve.cmpne.nxv2i64( %1, %3, %4) + ret %5 +} + +define @dupq_neg13( %x) #0 { +; CHECK-LABEL: @dupq_neg13( +; CHECK: cmpne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.vector.insert.nxv2i64.v2i64( undef, + <2 x i64> , i64 0) + %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %2 , i64 0) + %4 = tail call @llvm.aarch64.sve.cmpne.nxv2i64( %1, %3, %x) + ret %4 +} + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) + +declare @llvm.vector.insert.nxv16i8.v16i8(, <16 x i8>, i64) +declare @llvm.vector.insert.nxv8i16.v8i16(, <8 x i16>, i64) +declare @llvm.vector.insert.nxv4i32.v4i32(, <4 x i32>, i64) +declare @llvm.vector.insert.nxv2i64.v2i64(, <2 x i64>, i64) + +declare @llvm.aarch64.sve.dupq.lane.nxv16i8(, i64) +declare @llvm.aarch64.sve.dupq.lane.nxv8i16(, i64) +declare @llvm.aarch64.sve.dupq.lane.nxv4i32(, i64) +declare @llvm.aarch64.sve.dupq.lane.nxv2i64(, i64) + +declare @llvm.aarch64.sve.cmpne.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpne.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmpne.wide.nxv4i32(, , ) +declare @llvm.aarch64.sve.cmpne.nxv2i64(, , ) + +declare @llvm.aarch64.sve.dup.x.nxv2i64(i64) + +attributes #0 = { "target-features"="+sve" } +