Skip to content

Commit

Permalink
Fix codegen of consteval functions returning an empty class, and rela…
Browse files Browse the repository at this point in the history
…ted issues (#93115)

Fix codegen of consteval functions returning an empty class, and related
issues

If a class is empty, don't store it to memory: the store might overwrite
useful data. Similarly, if a class has tail padding that might overlap
other fields, don't store the tail padding to memory.

The problem here turned out a bit more general than I initially thought:
basically all uses of EmitAggregateStore were broken. Call lowering had
a method that did mostly the right thing, though: CreateCoercedStore.
Adapt CreateCoercedStore so it always does the conservatively right
thing, and use it for both calls and ConstantExpr.

Also, along the way, fix the "overlap" bit in AggValueSlot: the bit was
set incorrectly for empty classes in some cases.

Fixes #93040.

(cherry picked from commit 1762e01)
  • Loading branch information
efriedma-quic authored and tru committed Sep 10, 2024
1 parent 0c64156 commit 8664666
Show file tree
Hide file tree
Showing 15 changed files with 320 additions and 287 deletions.
146 changes: 61 additions & 85 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1336,75 +1336,50 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
return CGF.Builder.CreateLoad(Tmp);
}

// Function to store a first-class aggregate into memory. We prefer to
// store the elements rather than the aggregate to be more friendly to
// fast-isel.
// FIXME: Do we need to recurse here?
void CodeGenFunction::EmitAggregateStore(llvm::Value *Val, Address Dest,
bool DestIsVolatile) {
// Prefer scalar stores to first-class aggregate stores.
if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Address EltPtr = Builder.CreateStructGEP(Dest, i);
llvm::Value *Elt = Builder.CreateExtractValue(Val, i);
Builder.CreateStore(Elt, EltPtr, DestIsVolatile);
}
} else {
Builder.CreateStore(Val, Dest, DestIsVolatile);
}
}

/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
/// where the source and destination may have different types. The
/// destination is known to be aligned to \arg DstAlign bytes.
///
/// This safely handles the case when the src type is larger than the
/// destination type; the upper bits of the src will be lost.
static void CreateCoercedStore(llvm::Value *Src,
Address Dst,
bool DstIsVolatile,
CodeGenFunction &CGF) {
llvm::Type *SrcTy = Src->getType();
llvm::Type *DstTy = Dst.getElementType();
if (SrcTy == DstTy) {
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
return;
}

llvm::TypeSize SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);

if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
SrcSize.getFixedValue(), CGF);
DstTy = Dst.getElementType();
}

llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy);
llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy);
if (SrcPtrTy && DstPtrTy &&
SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) {
Src = CGF.Builder.CreateAddrSpaceCast(Src, DstTy);
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
void CodeGenFunction::CreateCoercedStore(llvm::Value *Src, Address Dst,
llvm::TypeSize DstSize,
bool DstIsVolatile) {
if (!DstSize)
return;
}

// If the source and destination are integer or pointer types, just do an
// extension or truncation to the desired type.
if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
(isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {
Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
return;
llvm::Type *SrcTy = Src->getType();
llvm::TypeSize SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);

// GEP into structs to try to make types match.
// FIXME: This isn't really that useful with opaque types, but it impacts a
// lot of regression tests.
if (SrcTy != Dst.getElementType()) {
if (llvm::StructType *DstSTy =
dyn_cast<llvm::StructType>(Dst.getElementType())) {
assert(!SrcSize.isScalable());
Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
SrcSize.getFixedValue(), *this);
}
}

llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);

// If store is legal, just bitcast the src pointer.
if (isa<llvm::ScalableVectorType>(SrcTy) ||
isa<llvm::ScalableVectorType>(DstTy) ||
SrcSize.getFixedValue() <= DstSize.getFixedValue()) {
Dst = Dst.withElementType(SrcTy);
CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);
if (SrcSize.isScalable() || SrcSize <= DstSize) {
if (SrcTy->isIntegerTy() && Dst.getElementType()->isPointerTy() &&
SrcSize == CGM.getDataLayout().getTypeAllocSize(Dst.getElementType())) {
// If the value is supposed to be a pointer, convert it before storing it.
Src = CoerceIntOrPtrToIntOrPtr(Src, Dst.getElementType(), *this);
Builder.CreateStore(Src, Dst, DstIsVolatile);
} else if (llvm::StructType *STy =
dyn_cast<llvm::StructType>(Src->getType())) {
// Prefer scalar stores to first-class aggregate stores.
Dst = Dst.withElementType(SrcTy);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Address EltPtr = Builder.CreateStructGEP(Dst, i);
llvm::Value *Elt = Builder.CreateExtractValue(Src, i);
Builder.CreateStore(Elt, EltPtr, DstIsVolatile);
}
} else {
Builder.CreateStore(Src, Dst.withElementType(SrcTy), DstIsVolatile);
}
} else if (SrcTy->isIntegerTy()) {
// If the source is a simple integer, coerce it directly.
llvm::Type *DstIntTy = Builder.getIntNTy(DstSize.getFixedValue() * 8);
Src = CoerceIntOrPtrToIntOrPtr(Src, DstIntTy, *this);
Builder.CreateStore(Src, Dst.withElementType(DstIntTy), DstIsVolatile);
} else {
// Otherwise do coercion through memory. This is stupid, but
// simple.
Expand All @@ -1416,12 +1391,12 @@ static void CreateCoercedStore(llvm::Value *Src,
// FIXME: Assert that we aren't truncating non-padding bits when have access
// to that information.
RawAddress Tmp =
CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
CGF.Builder.CreateStore(Src, Tmp);
CGF.Builder.CreateMemCpy(
Dst.emitRawPointer(CGF), Dst.getAlignment().getAsAlign(),
Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedValue()));
CreateTempAllocaForCoercion(*this, SrcTy, Dst.getAlignment());
Builder.CreateStore(Src, Tmp);
Builder.CreateMemCpy(Dst.emitRawPointer(*this),
Dst.getAlignment().getAsAlign(), Tmp.getPointer(),
Tmp.getAlignment().getAsAlign(),
Builder.CreateTypeSize(IntPtrTy, DstSize));
}
}

Expand Down Expand Up @@ -3309,7 +3284,12 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
assert(NumIRArgs == 1);
auto AI = Fn->getArg(FirstIRArg);
AI->setName(Arg->getName() + ".coerce");
CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this);
CreateCoercedStore(
AI, Ptr,
llvm::TypeSize::getFixed(
getContext().getTypeSizeInChars(Ty).getQuantity() -
ArgI.getDirectOffset()),
/*DstIsVolatile=*/false);
}

// Match to what EmitParmDecl is expecting for this type.
Expand Down Expand Up @@ -5939,17 +5919,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);
return RValue::getComplex(std::make_pair(Real, Imag));
}
case TEK_Aggregate: {
Address DestPtr = ReturnValue.getAddress();
bool DestIsVolatile = ReturnValue.isVolatile();

if (!DestPtr.isValid()) {
DestPtr = CreateMemTemp(RetTy, "agg.tmp");
DestIsVolatile = false;
}
EmitAggregateStore(CI, DestPtr, DestIsVolatile);
return RValue::getAggregate(DestPtr);
}
case TEK_Aggregate:
break;
case TEK_Scalar: {
// If the argument doesn't match, perform a bitcast to coerce it.
// This can happen due to trivial type mismatches.
Expand All @@ -5959,7 +5930,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
return RValue::get(V);
}
}
llvm_unreachable("bad evaluation kind");
}

// If coercing a fixed vector from a scalable vector for ABI
Expand All @@ -5981,10 +5951,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

Address DestPtr = ReturnValue.getValue();
bool DestIsVolatile = ReturnValue.isVolatile();
uint64_t DestSize =
getContext().getTypeInfoDataSizeInChars(RetTy).Width.getQuantity();

if (!DestPtr.isValid()) {
DestPtr = CreateMemTemp(RetTy, "coerce");
DestIsVolatile = false;
DestSize = getContext().getTypeSizeInChars(RetTy).getQuantity();
}

// An empty record can overlap other data (if declared with
Expand All @@ -5993,7 +5966,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (!isEmptyRecord(getContext(), RetTy, true)) {
// If the value is offset in memory, apply the offset now.
Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
CreateCoercedStore(
CI, StorePtr,
llvm::TypeSize::getFixed(DestSize - RetAI.getDirectOffset()),
DestIsVolatile);
}

return convertTempToRValue(DestPtr, RetTy, SourceLocation());
Expand Down
23 changes: 14 additions & 9 deletions clang/lib/CodeGen/CGExprAgg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,12 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
EnsureDest(E->getType());

if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
Address StoreDest = Dest.getAddress();
// The emitted value is guaranteed to have the same size as the
// destination but can have a different type. Just do a bitcast in this
// case to avoid incorrect GEPs.
if (Result->getType() != StoreDest.getType())
StoreDest = StoreDest.withElementType(Result->getType());

CGF.EmitAggregateStore(Result, StoreDest,
E->getType().isVolatileQualified());
CGF.CreateCoercedStore(
Result, Dest.getAddress(),
llvm::TypeSize::getFixed(
Dest.getPreferredSize(CGF.getContext(), E->getType())
.getQuantity()),
E->getType().isVolatileQualified());
return;
}
return Visit(E->getSubExpr());
Expand Down Expand Up @@ -2050,6 +2047,10 @@ CodeGenFunction::getOverlapForFieldInit(const FieldDecl *FD) {
if (!FD->hasAttr<NoUniqueAddressAttr>() || !FD->getType()->isRecordType())
return AggValueSlot::DoesNotOverlap;

// Empty fields can overlap earlier fields.
if (FD->getType()->getAsCXXRecordDecl()->isEmpty())
return AggValueSlot::MayOverlap;

// If the field lies entirely within the enclosing class's nvsize, its tail
// padding cannot overlap any already-initialized object. (The only subobjects
// with greater addresses that might already be initialized are vbases.)
Expand All @@ -2072,6 +2073,10 @@ AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(
if (IsVirtual)
return AggValueSlot::MayOverlap;

// Empty bases can overlap earlier bases.
if (BaseRD->isEmpty())
return AggValueSlot::MayOverlap;

// If the base class is laid out entirely within the nvsize of the derived
// class, its tail padding cannot yet be initialized, so we can issue
// stores at the full width of the base class.
Expand Down
7 changes: 4 additions & 3 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -4838,9 +4838,10 @@ class CodeGenFunction : public CodeGenTypeCache {
void EmitAggFinalDestCopy(QualType Type, AggValueSlot Dest, const LValue &Src,
ExprValueKind SrcKind);

/// Build all the stores needed to initialize an aggregate at Dest with the
/// value Val.
void EmitAggregateStore(llvm::Value *Val, Address Dest, bool DestIsVolatile);
/// Create a store to \arg DstPtr from \arg Src, truncating the stored value
/// to at most \arg DstSize bytes.
void CreateCoercedStore(llvm::Value *Src, Address Dst, llvm::TypeSize DstSize,
bool DstIsVolatile);

/// EmitExtendGCLifetime - Given a pointer to an Objective-C object,
/// make sure it survives garbage collection until this point.
Expand Down
43 changes: 27 additions & 16 deletions clang/test/CodeGen/arm-mve-intrinsics/vld24.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,13 @@ uint8x16x4_t test_vld4q_u8(const uint8_t *addr)

// CHECK-LABEL: @test_vst2q_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T:%.*]] [[VALUE_COERCE:%.*]], 0, 0
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T]] [[VALUE_COERCE]], 0, 1
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[ADDR:%.*]], <4 x i32> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <4 x i32> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 0)
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[ADDR]], <4 x i32> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <4 x i32> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 1)
// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T:%.*]] [[VALUE_COERCE:%.*]], 0
// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP0]], 0
// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP0]], 1
// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[ADDR:%.*]], <4 x i32> [[DOTFCA_0_EXTRACT]], <4 x i32> [[DOTFCA_1_EXTRACT]], i32 0)
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[ADDR]], <4 x i32> [[DOTFCA_0_EXTRACT]], <4 x i32> [[DOTFCA_1_EXTRACT]], i32 1)
// CHECK-NEXT: ret void
//
void test_vst2q_u32(uint32_t *addr, uint32x4x2_t value)
Expand All @@ -65,14 +68,19 @@ void test_vst2q_u32(uint32_t *addr, uint32x4x2_t value)

// CHECK-LABEL: @test_vst4q_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T:%.*]] [[VALUE_COERCE:%.*]], 0, 0
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] [[VALUE_COERCE]], 0, 1
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] [[VALUE_COERCE]], 0, 2
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] [[VALUE_COERCE]], 0, 3
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR:%.*]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 0)
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 1)
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 2)
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 3)
// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X16X4_T:%.*]] [[VALUE_COERCE:%.*]], 0
// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[TMP0]], 0
// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[TMP0]], 1
// CHECK-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[TMP0]], 2
// CHECK-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[TMP0]], 3
// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[DOTFCA_2_EXTRACT]], 0, 2
// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[DOTFCA_3_EXTRACT]], 0, 3
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR:%.*]], <16 x i8> [[DOTFCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_EXTRACT]], i32 0)
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[DOTFCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_EXTRACT]], i32 1)
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[DOTFCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_EXTRACT]], i32 2)
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[DOTFCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_EXTRACT]], i32 3)
// CHECK-NEXT: ret void
//
void test_vst4q_s8(int8_t *addr, int8x16x4_t value)
Expand All @@ -86,10 +94,13 @@ void test_vst4q_s8(int8_t *addr, int8x16x4_t value)

// CHECK-LABEL: @test_vst2q_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T:%.*]] [[VALUE_COERCE:%.*]], 0, 0
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[VALUE_COERCE]], 0, 1
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v8f16(ptr [[ADDR:%.*]], <8 x half> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <8 x half> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 0)
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v8f16(ptr [[ADDR]], <8 x half> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <8 x half> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 1)
// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T:%.*]] [[VALUE_COERCE:%.*]], 0
// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[TMP0]], 0
// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[TMP0]], 1
// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] poison, <8 x half> [[DOTFCA_0_EXTRACT]], 0, 0
// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x half> [[DOTFCA_1_EXTRACT]], 0, 1
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v8f16(ptr [[ADDR:%.*]], <8 x half> [[DOTFCA_0_EXTRACT]], <8 x half> [[DOTFCA_1_EXTRACT]], i32 0)
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v8f16(ptr [[ADDR]], <8 x half> [[DOTFCA_0_EXTRACT]], <8 x half> [[DOTFCA_1_EXTRACT]], i32 1)
// CHECK-NEXT: ret void
//
void test_vst2q_f16(float16_t *addr, float16x8x2_t value)
Expand Down
Loading

0 comments on commit 8664666

Please sign in to comment.