diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h index ba9bd439c4a39f..cb935becaef190 100644 --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -764,10 +764,7 @@ class CompilerInstance : public ModuleLoader { static bool InitializeSourceManager(const FrontendInputFile &Input, DiagnosticsEngine &Diags, FileManager &FileMgr, - SourceManager &SourceMgr, - HeaderSearch *HS, - DependencyOutputOptions &DepOpts, - const FrontendOptions &Opts); + SourceManager &SourceMgr); /// } diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 387b91abb537d0..d0394492cad6dd 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -130,8 +130,26 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { "exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi", }); + switch (*Name) { + case 'I': + Info.setRequiresImmediate(-16, 64); + return true; + case 'J': + Info.setRequiresImmediate(-32768, 32767); + return true; + case 'A': + case 'B': + case 'C': + Info.setRequiresImmediate(); + return true; + default: + break; + } + StringRef S(Name); - if (S == "A") { + + if (S == "DA" || S == "DB") { + Name++; Info.setRequiresImmediate(); return true; } @@ -203,6 +221,12 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { // the constraint. In practice, it won't be changed unless the // constraint is longer than one character. std::string convertConstraint(const char *&Constraint) const override { + + StringRef S(Constraint); + if (S == "DA" || S == "DB") { + return std::string("^") + std::string(Constraint++, 2); + } + const char *Begin = Constraint; TargetInfo::ConstraintInfo Info("", ""); if (validateAsmConstraint(Constraint, Info)) diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 0140a756b7dde8..9dc9c42297eda1 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -815,17 +815,15 @@ std::unique_ptr CompilerInstance::createOutputFile( // Initialization Utilities bool CompilerInstance::InitializeSourceManager(const FrontendInputFile &Input){ - return InitializeSourceManager( - Input, getDiagnostics(), getFileManager(), getSourceManager(), - hasPreprocessor() ? &getPreprocessor().getHeaderSearchInfo() : nullptr, - getDependencyOutputOpts(), getFrontendOpts()); + return InitializeSourceManager(Input, getDiagnostics(), getFileManager(), + getSourceManager()); } // static -bool CompilerInstance::InitializeSourceManager( - const FrontendInputFile &Input, DiagnosticsEngine &Diags, - FileManager &FileMgr, SourceManager &SourceMgr, HeaderSearch *HS, - DependencyOutputOptions &DepOpts, const FrontendOptions &Opts) { +bool CompilerInstance::InitializeSourceManager(const FrontendInputFile &Input, + DiagnosticsEngine &Diags, + FileManager &FileMgr, + SourceManager &SourceMgr) { SrcMgr::CharacteristicKind Kind = Input.getKind().getFormat() == InputKind::ModuleMap ? Input.isSystem() ? SrcMgr::C_System_ModuleMap diff --git a/clang/test/CodeGenOpenCL/inline-asm-amdgcn.cl b/clang/test/CodeGenOpenCL/inline-asm-amdgcn.cl index 37090772f66461..259c12384f2c8d 100644 --- a/clang/test/CodeGenOpenCL/inline-asm-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/inline-asm-amdgcn.cl @@ -33,3 +33,17 @@ kernel void test_agpr() { : "={a1}"(reg_a) : "{a1}"(reg_b)); } + +kernel void test_constraint_DA() { + const long x = 0x200000001; + int res; + // CHECK: call i32 asm sideeffect "v_mov_b32 $0, $1 & 0xFFFFFFFF", "=v,^DA"(i64 8589934593) + __asm volatile("v_mov_b32 %0, %1 & 0xFFFFFFFF" : "=v"(res) : "DA"(x)); +} + +kernel void test_constraint_DB() { + const long x = 0x200000001; + int res; + // CHECK: call i32 asm sideeffect "v_mov_b32 $0, $1 & 0xFFFFFFFF", "=v,^DB"(i64 8589934593) + __asm volatile("v_mov_b32 %0, %1 & 0xFFFFFFFF" : "=v"(res) : "DB"(x)); +} diff --git a/clang/test/Sema/inline-asm-validate-amdgpu.cl b/clang/test/Sema/inline-asm-validate-amdgpu.cl index 3d6488227ef299..418952c0e7272b 100644 --- a/clang/test/Sema/inline-asm-validate-amdgpu.cl +++ b/clang/test/Sema/inline-asm-validate-amdgpu.cl @@ -18,9 +18,35 @@ kernel void test () { // vgpr constraints __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : ); - // 'A' constraint + // 'I' constraint (an immediate integer in the range -16 to 64) + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (imm) : ); + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (-16) : ); + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (64) : ); + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (-17) : ); // expected-error {{value '-17' out of range for constraint 'I'}} + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (65) : ); // expected-error {{value '65' out of range for constraint 'I'}} + + // 'J' constraint (an immediate 16-bit signed integer) + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (imm) : ); + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (-32768) : ); + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (32767) : ); + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (-32769) : ); // expected-error {{value '-32769' out of range for constraint 'J'}} + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (32768) : ); // expected-error {{value '32768' out of range for constraint 'J'}} + + // 'A' constraint (an immediate constant that can be inlined) __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "A" (imm) : ); + // 'B' constraint (an immediate 32-bit signed integer) + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "B" (imm) : ); + + // 'C' constraint (an immediate 32-bit unsigned integer or 'A' constraint) + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "C" (imm) : ); + + // 'DA' constraint (an immediate 64-bit constant that can be split into two 'A' constants) + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "DA" (imm) : ); + + // 'DB' constraint (an immediate 64-bit constant that can be split into two 'B' constants) + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "DB" (imm) : ); + } __kernel void diff --git a/flang/runtime/file.cpp b/flang/runtime/file.cpp index 677b9675f47aa7..fdd7ea64c20185 100644 --- a/flang/runtime/file.cpp +++ b/flang/runtime/file.cpp @@ -352,7 +352,7 @@ bool OpenFile::RawSeekToEnd() { int OpenFile::PendingResult(const Terminator &terminator, int iostat) { int id{nextId_++}; - pending_.reset(&New{}(terminator, id, iostat, std::move(pending_))); + pending_ = New{terminator}(id, iostat, std::move(pending_)); return id; } } // namespace Fortran::runtime::io diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index db228e5bbf21c7..bbb7eb2863c613 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -28,9 +28,10 @@ Cookie BeginInternalArrayListIO(const Descriptor &descriptor, void ** /*scratchArea*/, std::size_t /*scratchBytes*/, const char *sourceFile, int sourceLine) { Terminator oom{sourceFile, sourceLine}; - return &New>{}( - oom, descriptor, sourceFile, sourceLine) - .ioStatementState(); + return &New>{oom}( + descriptor, sourceFile, sourceLine) + .release() + ->ioStatementState(); } Cookie IONAME(BeginInternalArrayListOutput)(const Descriptor &descriptor, @@ -52,9 +53,10 @@ Cookie BeginInternalArrayFormattedIO(const Descriptor &descriptor, const char *format, std::size_t formatLength, void ** /*scratchArea*/, std::size_t /*scratchBytes*/, const char *sourceFile, int sourceLine) { Terminator oom{sourceFile, sourceLine}; - return &New>{}( - oom, descriptor, format, formatLength, sourceFile, sourceLine) - .ioStatementState(); + return &New>{oom}( + descriptor, format, formatLength, sourceFile, sourceLine) + .release() + ->ioStatementState(); } Cookie IONAME(BeginInternalArrayFormattedOutput)(const Descriptor &descriptor, @@ -78,9 +80,10 @@ Cookie BeginInternalFormattedIO( void ** /*scratchArea*/, std::size_t /*scratchBytes*/, const char *sourceFile, int sourceLine) { Terminator oom{sourceFile, sourceLine}; - return &New>{}(oom, internal, - internalLength, format, formatLength, sourceFile, sourceLine) - .ioStatementState(); + return &New>{oom}( + internal, internalLength, format, formatLength, sourceFile, sourceLine) + .release() + ->ioStatementState(); } Cookie IONAME(BeginInternalFormattedOutput)(char *internal, @@ -234,8 +237,9 @@ Cookie IONAME(BeginClose)( } else { // CLOSE(UNIT=bad unit) is just a no-op Terminator oom{sourceFile, sourceLine}; - return &New{}(oom, sourceFile, sourceLine) - .ioStatementState(); + return &New{oom}(sourceFile, sourceLine) + .release() + ->ioStatementState(); } } diff --git a/flang/runtime/memory.h b/flang/runtime/memory.h index b8e84952a99f44..f21b237f3905b7 100644 --- a/flang/runtime/memory.h +++ b/flang/runtime/memory.h @@ -32,20 +32,32 @@ template void FreeMemoryAndNullify(A *&p) { p = nullptr; } -template struct New { - template - [[nodiscard]] A &operator()(const Terminator &terminator, X &&... x) { - return *new (AllocateMemoryOrCrash(terminator, sizeof(A))) - A{std::forward(x)...}; - } -}; - template struct OwningPtrDeleter { void operator()(A *p) { FreeMemory(p); } }; template using OwningPtr = std::unique_ptr>; +template class SizedNew { +public: + explicit SizedNew(const Terminator &terminator) : terminator_{terminator} {} + template + [[nodiscard]] OwningPtr operator()(std::size_t bytes, X &&... x) { + return OwningPtr{new (AllocateMemoryOrCrash(terminator_, bytes)) + A{std::forward(x)...}}; + } + +private: + const Terminator &terminator_; +}; + +template struct New : public SizedNew { + using SizedNew::SizedNew; + template [[nodiscard]] OwningPtr operator()(X &&... x) { + return SizedNew::operator()(sizeof(A), std::forward(x)...); + } +}; + template struct Allocator { using value_type = A; explicit Allocator(const Terminator &t) : terminator{t} {} diff --git a/flang/runtime/unit-map.cpp b/flang/runtime/unit-map.cpp index 4e58cf590c2aca..5cbbf059d5f882 100644 --- a/flang/runtime/unit-map.cpp +++ b/flang/runtime/unit-map.cpp @@ -64,7 +64,7 @@ void UnitMap::CloseAll(IoErrorHandler &handler) { } ExternalFileUnit &UnitMap::Create(int n, const Terminator &terminator) { - Chain &chain{New{}(terminator, n)}; + Chain &chain{*New{terminator}(n).release()}; chain.next.reset(&chain); bucket_[Hash(n)].swap(chain.next); // pushes new node as list head return chain.unit; diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp index 81035ab6fcde83..2eee142081a526 100644 --- a/flang/runtime/unit.cpp +++ b/flang/runtime/unit.cpp @@ -95,7 +95,7 @@ UnitMap &ExternalFileUnit::GetUnitMap() { return *unitMap; } Terminator terminator{__FILE__, __LINE__}; - unitMap = &New{}(terminator); + unitMap = New{terminator}().release(); ExternalFileUnit &out{ExternalFileUnit::LookUpOrCreate(6, terminator)}; out.Predefine(1); out.set_mayRead(false); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 4bf5796ed59d68..0bb69eb91362b4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2741,31 +2741,21 @@ void DWARFASTParserClang::ParseSingleMember( if (TypeSystemClang::IsCXXClassType(member_clang_type) && !member_clang_type.GetCompleteType()) { - if (die.GetCU()->GetProducer() == eProducerClang) - module_sp->ReportError( - "DWARF DIE at 0x%8.8x (class %s) has a member variable " - "0x%8.8x (%s) whose type is a forward declaration, not a " - "complete definition.\nTry compiling the source file " - "with -fstandalone-debug", - parent_die.GetOffset(), parent_die.GetName(), die.GetOffset(), - name); - else - module_sp->ReportError( - "DWARF DIE at 0x%8.8x (class %s) has a member variable " - "0x%8.8x (%s) whose type is a forward declaration, not a " - "complete definition.\nPlease file a bug against the " - "compiler and include the preprocessed output for %s", - parent_die.GetOffset(), parent_die.GetName(), die.GetOffset(), - name, GetUnitName(parent_die).c_str()); - // We have no choice other than to pretend that the member - // class is complete. If we don't do this, clang will crash - // when trying to layout the class. Since we provide layout - // assistance, all ivars in this class and other classes will - // be fine, this is the best we can do short of crashing. + // Mark the class as complete, ut we make a note of the fact that + // this class is not _really_ complete so we can later search for a + // definition in a different module. + // Since we provide layout assistance, all ivars in this class and + // other classes will be fine even if we are not able to find the + // definition elsewhere. if (TypeSystemClang::StartTagDeclarationDefinition( member_clang_type)) { TypeSystemClang::CompleteTagDeclarationDefinition( member_clang_type); + const auto *td = TypeSystemClang::GetQualType( + member_clang_type.GetOpaqueQualType()) + .getTypePtr() + ->getAsTagDecl(); + m_ast.GetMetadata(td)->SetIsForcefullyCompleted(); } else { module_sp->ReportError( "DWARF DIE at 0x%8.8x (class %s) has a member variable " diff --git a/lldb/source/Utility/Scalar.cpp b/lldb/source/Utility/Scalar.cpp index 7bd42f3b953550..c36ccab21a39fb 100644 --- a/lldb/source/Utility/Scalar.cpp +++ b/lldb/source/Utility/Scalar.cpp @@ -23,6 +23,60 @@ using namespace lldb; using namespace lldb_private; +namespace { +enum class Category { Void, Integral, Float }; +} + +static Category GetCategory(Scalar::Type type) { + switch (type) { + case Scalar::e_void: + return Category::Void; + case Scalar::e_float: + case Scalar::e_double: + case Scalar::e_long_double: + return Category::Float; + case Scalar::e_sint: + case Scalar::e_slong: + case Scalar::e_slonglong: + case Scalar::e_sint128: + case Scalar::e_sint256: + case Scalar::e_sint512: + case Scalar::e_uint: + case Scalar::e_ulong: + case Scalar::e_ulonglong: + case Scalar::e_uint128: + case Scalar::e_uint256: + case Scalar::e_uint512: + return Category::Integral; + } + llvm_unreachable("Unhandled type!"); +} + +static bool IsSigned(Scalar::Type type) { + switch (type) { + case Scalar::e_void: + case Scalar::e_uint: + case Scalar::e_ulong: + case Scalar::e_ulonglong: + case Scalar::e_uint128: + case Scalar::e_uint256: + case Scalar::e_uint512: + return false; + case Scalar::e_sint: + case Scalar::e_slong: + case Scalar::e_slonglong: + case Scalar::e_sint128: + case Scalar::e_sint256: + case Scalar::e_sint512: + case Scalar::e_float: + case Scalar::e_double: + case Scalar::e_long_double: + return true; + } + llvm_unreachable("Unhandled type!"); +} + + // Promote to max type currently follows the ANSI C rule for type promotion in // expressions. static Scalar::Type PromoteToMaxType( @@ -103,40 +157,19 @@ bool Scalar::GetData(DataExtractor &data, size_t limit_byte_size) const { void Scalar::GetBytes(llvm::MutableArrayRef storage) const { assert(storage.size() >= GetByteSize()); - switch (m_type) { - case e_void: - break; - case e_sint: - case e_uint: - case e_slong: - case e_ulong: - case e_slonglong: - case e_ulonglong: - case e_sint128: - case e_uint128: - case e_sint256: - case e_uint256: - case e_sint512: - case e_uint512: - StoreIntToMemory(m_integer, storage.data(), - (m_integer.getBitWidth() + 7) / 8); - break; - case e_float: { - float val = m_float.convertToFloat(); - memcpy(storage.data(), &val, sizeof(val)); + const auto &store = [&](const llvm::APInt val) { + StoreIntToMemory(val, storage.data(), (val.getBitWidth() + 7) / 8); + }; + switch (GetCategory(m_type)) { + case Category::Void: break; - } - case e_double: { - double val = m_float.convertToDouble(); - memcpy(storage.data(), &val, sizeof(double)); + case Category::Integral: + store(m_integer); break; - } - case e_long_double: { - llvm::APInt val = m_float.bitcastToAPInt(); - StoreIntToMemory(val, storage.data(), storage.size()); + case Category::Float: + store(m_float.bitcastToAPInt()); break; } - } } size_t Scalar::GetByteSize() const { @@ -167,26 +200,12 @@ size_t Scalar::GetByteSize() const { } bool Scalar::IsZero() const { - llvm::APInt zero_int = llvm::APInt::getNullValue(m_integer.getBitWidth() / 8); - switch (m_type) { - case e_void: + switch (GetCategory(m_type)) { + case Category::Void: break; - case e_sint: - case e_uint: - case e_slong: - case e_ulong: - case e_slonglong: - case e_ulonglong: - case e_sint128: - case e_uint128: - case e_sint256: - case e_uint256: - case e_uint512: - case e_sint512: - return llvm::APInt::isSameValue(zero_int, m_integer); - case e_float: - case e_double: - case e_long_double: + case Category::Integral: + return m_integer.isNullValue(); + case Category::Float: return m_float.isZero(); } return false; @@ -196,31 +215,16 @@ void Scalar::GetValue(Stream *s, bool show_type) const { if (show_type) s->Printf("(%s) ", GetTypeAsCString()); - switch (m_type) { - case e_void: - break; - case e_sint: - case e_slong: - case e_slonglong: - case e_sint128: - case e_sint256: - case e_sint512: - s->PutCString(m_integer.toString(10, true)); + switch (GetCategory(m_type)) { + case Category::Void: break; - case e_uint: - case e_ulong: - case e_ulonglong: - case e_uint128: - case e_uint256: - case e_uint512: - s->PutCString(m_integer.toString(10, false)); + case Category::Integral: + s->PutCString(m_integer.toString(10, IsSigned(m_type))); break; - case e_float: - case e_double: - case e_long_double: + case Category::Float: llvm::SmallString<24> string; m_float.toString(string); - s->Printf("%s", string.c_str()); + s->PutCString(string); break; } } @@ -325,59 +329,6 @@ static size_t GetBitSize(Scalar::Type type) { llvm_unreachable("Unhandled type!"); } -static bool IsSigned(Scalar::Type type) { - switch (type) { - case Scalar::e_void: - case Scalar::e_uint: - case Scalar::e_ulong: - case Scalar::e_ulonglong: - case Scalar::e_uint128: - case Scalar::e_uint256: - case Scalar::e_uint512: - return false; - case Scalar::e_sint: - case Scalar::e_slong: - case Scalar::e_slonglong: - case Scalar::e_sint128: - case Scalar::e_sint256: - case Scalar::e_sint512: - case Scalar::e_float: - case Scalar::e_double: - case Scalar::e_long_double: - return true; - } - llvm_unreachable("Unhandled type!"); -} - -namespace { -enum class Category { Void, Integral, Float }; -} - -static Category GetCategory(Scalar::Type type) { - switch (type) { - case Scalar::e_void: - return Category::Void; - case Scalar::e_float: - case Scalar::e_double: - case Scalar::e_long_double: - return Category::Float; - case Scalar::e_sint: - case Scalar::e_slong: - case Scalar::e_slonglong: - case Scalar::e_sint128: - case Scalar::e_sint256: - case Scalar::e_sint512: - case Scalar::e_uint: - case Scalar::e_ulong: - case Scalar::e_ulonglong: - case Scalar::e_uint128: - case Scalar::e_uint256: - case Scalar::e_uint512: - return Category::Integral; - } - llvm_unreachable("Unhandled type!"); -} - static const llvm::fltSemantics &GetFltSemantics(Scalar::Type type) { switch (type) { case Scalar::e_void: @@ -849,27 +800,14 @@ Scalar &Scalar::operator+=(const Scalar &rhs) { const Scalar *b; if ((m_type = PromoteToMaxType(*this, rhs, temp_value, a, b)) != Scalar::e_void) { - switch (m_type) { - case e_void: + switch (GetCategory(m_type)) { + case Category::Void: break; - case e_sint: - case e_uint: - case e_slong: - case e_ulong: - case e_slonglong: - case e_ulonglong: - case e_sint128: - case e_uint128: - case e_sint256: - case e_uint256: - case e_sint512: - case e_uint512: + case Category::Integral: m_integer = a->m_integer + b->m_integer; break; - case e_float: - case e_double: - case e_long_double: + case Category::Float: m_float = a->m_float + b->m_float; break; } @@ -878,54 +816,25 @@ Scalar &Scalar::operator+=(const Scalar &rhs) { } Scalar &Scalar::operator<<=(const Scalar &rhs) { - switch (m_type) { - case e_void: - case e_float: - case e_double: - case e_long_double: + if (GetCategory(m_type) == Category::Integral && + GetCategory(rhs.m_type) == Category::Integral) + m_integer <<= rhs.m_integer; + else m_type = e_void; - break; - - case e_sint: - case e_uint: - case e_slong: - case e_ulong: - case e_slonglong: - case e_ulonglong: - case e_sint128: - case e_uint128: - case e_sint256: - case e_uint256: - case e_sint512: - case e_uint512: - switch (rhs.m_type) { - case e_void: - case e_float: - case e_double: - case e_long_double: - m_type = e_void; - break; - case e_sint: - case e_uint: - case e_slong: - case e_ulong: - case e_slonglong: - case e_ulonglong: - case e_sint128: - case e_uint128: - case e_sint256: - case e_uint256: - case e_sint512: - case e_uint512: - m_integer = m_integer << rhs.m_integer; - break; - } - break; - } return *this; } bool Scalar::ShiftRightLogical(const Scalar &rhs) { + if (GetCategory(m_type) == Category::Integral && + GetCategory(rhs.m_type) == Category::Integral) { + m_integer = m_integer.lshr(rhs.m_integer); + return true; + } + m_type = e_void; + return false; +} + +Scalar &Scalar::operator>>=(const Scalar &rhs) { switch (m_type) { case e_void: case e_float: @@ -965,111 +874,24 @@ bool Scalar::ShiftRightLogical(const Scalar &rhs) { case e_uint256: case e_sint512: case e_uint512: - m_integer = m_integer.lshr(rhs.m_integer); + m_integer = m_integer.ashr(rhs.m_integer); break; } break; } - return m_type != e_void; + return *this; } -Scalar &Scalar::operator>>=(const Scalar &rhs) { - switch (m_type) { - case e_void: - case e_float: - case e_double: - case e_long_double: - m_type = e_void; - break; - - case e_sint: - case e_uint: - case e_slong: - case e_ulong: - case e_slonglong: - case e_ulonglong: - case e_sint128: - case e_uint128: - case e_sint256: - case e_uint256: - case e_sint512: - case e_uint512: - switch (rhs.m_type) { - case e_void: - case e_float: - case e_double: - case e_long_double: - m_type = e_void; - break; - case e_sint: - case e_uint: - case e_slong: - case e_ulong: - case e_slonglong: - case e_ulonglong: - case e_sint128: - case e_uint128: - case e_sint256: - case e_uint256: - case e_sint512: - case e_uint512: - m_integer = m_integer.ashr(rhs.m_integer); - break; - } - break; - } - return *this; -} - -Scalar &Scalar::operator&=(const Scalar &rhs) { - switch (m_type) { - case e_void: - case e_float: - case e_double: - case e_long_double: - m_type = e_void; - break; - - case e_sint: - case e_uint: - case e_slong: - case e_ulong: - case e_slonglong: - case e_ulonglong: - case e_sint128: - case e_uint128: - case e_sint256: - case e_uint256: - case e_sint512: - case e_uint512: - switch (rhs.m_type) { - case e_void: - case e_float: - case e_double: - case e_long_double: - m_type = e_void; - break; - case e_sint: - case e_uint: - case e_slong: - case e_ulong: - case e_slonglong: - case e_ulonglong: - case e_sint128: - case e_uint128: - case e_sint256: - case e_uint256: - case e_sint512: - case e_uint512: - m_integer &= rhs.m_integer; - break; - } - break; - } - return *this; -} - -bool Scalar::AbsoluteValue() { +Scalar &Scalar::operator&=(const Scalar &rhs) { + if (GetCategory(m_type) == Category::Integral && + GetCategory(rhs.m_type) == Category::Integral) + m_integer &= rhs.m_integer; + else + m_type = e_void; + return *this; +} + +bool Scalar::AbsoluteValue() { switch (m_type) { case e_void: break; @@ -1101,26 +923,13 @@ bool Scalar::AbsoluteValue() { } bool Scalar::UnaryNegate() { - switch (m_type) { - case e_void: + switch (GetCategory(m_type)) { + case Category::Void: break; - case e_sint: - case e_uint: - case e_slong: - case e_ulong: - case e_slonglong: - case e_ulonglong: - case e_sint128: - case e_uint128: - case e_sint256: - case e_uint256: - case e_sint512: - case e_uint512: + case Category::Integral: m_integer = -m_integer; return true; - case e_float: - case e_double: - case e_long_double: + case Category::Float: m_float.changeSign(); return true; } @@ -1128,62 +937,17 @@ bool Scalar::UnaryNegate() { } bool Scalar::OnesComplement() { - switch (m_type) { - case e_sint: - case e_uint: - case e_slong: - case e_ulong: - case e_slonglong: - case e_ulonglong: - case e_sint128: - case e_uint128: - case e_sint256: - case e_uint256: - case e_sint512: - case e_uint512: + if (GetCategory(m_type) == Category::Integral) { m_integer = ~m_integer; return true; - - case e_void: - case e_float: - case e_double: - case e_long_double: - break; } + return false; } const Scalar lldb_private::operator+(const Scalar &lhs, const Scalar &rhs) { - Scalar result; - Scalar temp_value; - const Scalar *a; - const Scalar *b; - if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) != - Scalar::e_void) { - switch (result.m_type) { - case Scalar::e_void: - break; - case Scalar::e_sint: - case Scalar::e_uint: - case Scalar::e_slong: - case Scalar::e_ulong: - case Scalar::e_slonglong: - case Scalar::e_ulonglong: - case Scalar::e_sint128: - case Scalar::e_uint128: - case Scalar::e_sint256: - case Scalar::e_uint256: - case Scalar::e_sint512: - case Scalar::e_uint512: - result.m_integer = a->m_integer + b->m_integer; - break; - case Scalar::e_float: - case Scalar::e_double: - case Scalar::e_long_double: - result.m_float = a->m_float + b->m_float; - break; - } - } + Scalar result = lhs; + result += rhs; return result; } @@ -1194,26 +958,13 @@ const Scalar lldb_private::operator-(const Scalar &lhs, const Scalar &rhs) { const Scalar *b; if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) != Scalar::e_void) { - switch (result.m_type) { - case Scalar::e_void: + switch (GetCategory(result.m_type)) { + case Category::Void: break; - case Scalar::e_sint: - case Scalar::e_uint: - case Scalar::e_slong: - case Scalar::e_ulong: - case Scalar::e_slonglong: - case Scalar::e_ulonglong: - case Scalar::e_sint128: - case Scalar::e_uint128: - case Scalar::e_sint256: - case Scalar::e_uint256: - case Scalar::e_sint512: - case Scalar::e_uint512: + case Category::Integral: result.m_integer = a->m_integer - b->m_integer; break; - case Scalar::e_float: - case Scalar::e_double: - case Scalar::e_long_double: + case Category::Float: result.m_float = a->m_float - b->m_float; break; } @@ -1227,40 +978,20 @@ const Scalar lldb_private::operator/(const Scalar &lhs, const Scalar &rhs) { const Scalar *a; const Scalar *b; if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) != - Scalar::e_void) { - switch (result.m_type) { - case Scalar::e_void: + Scalar::e_void && + !b->IsZero()) { + switch (GetCategory(result.m_type)) { + case Category::Void: break; - case Scalar::e_sint: - case Scalar::e_slong: - case Scalar::e_slonglong: - case Scalar::e_sint128: - case Scalar::e_sint256: - case Scalar::e_sint512: - if (b->m_integer != 0) { + case Category::Integral: + if (IsSigned(result.m_type)) result.m_integer = a->m_integer.sdiv(b->m_integer); - return result; - } - break; - case Scalar::e_uint: - case Scalar::e_ulong: - case Scalar::e_ulonglong: - case Scalar::e_uint128: - case Scalar::e_uint256: - case Scalar::e_uint512: - if (b->m_integer != 0) { + else result.m_integer = a->m_integer.udiv(b->m_integer); - return result; - } - break; - case Scalar::e_float: - case Scalar::e_double: - case Scalar::e_long_double: - if (!b->m_float.isZero()) { - result.m_float = a->m_float / b->m_float; - return result; - } - break; + return result; + case Category::Float: + result.m_float = a->m_float / b->m_float; + return result; } } // For division only, the only way it should make it here is if a promotion @@ -1276,26 +1007,13 @@ const Scalar lldb_private::operator*(const Scalar &lhs, const Scalar &rhs) { const Scalar *b; if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) != Scalar::e_void) { - switch (result.m_type) { - case Scalar::e_void: + switch (GetCategory(result.m_type)) { + case Category::Void: break; - case Scalar::e_sint: - case Scalar::e_uint: - case Scalar::e_slong: - case Scalar::e_ulong: - case Scalar::e_slonglong: - case Scalar::e_ulonglong: - case Scalar::e_sint128: - case Scalar::e_uint128: - case Scalar::e_sint256: - case Scalar::e_uint256: - case Scalar::e_sint512: - case Scalar::e_uint512: + case Category::Integral: result.m_integer = a->m_integer * b->m_integer; break; - case Scalar::e_float: - case Scalar::e_double: - case Scalar::e_long_double: + case Category::Float: result.m_float = a->m_float * b->m_float; break; } @@ -1310,29 +1028,10 @@ const Scalar lldb_private::operator&(const Scalar &lhs, const Scalar &rhs) { const Scalar *b; if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) != Scalar::e_void) { - switch (result.m_type) { - case Scalar::e_sint: - case Scalar::e_uint: - case Scalar::e_slong: - case Scalar::e_ulong: - case Scalar::e_slonglong: - case Scalar::e_ulonglong: - case Scalar::e_sint128: - case Scalar::e_uint128: - case Scalar::e_sint256: - case Scalar::e_uint256: - case Scalar::e_sint512: - case Scalar::e_uint512: + if (GetCategory(result.m_type) == Category::Integral) result.m_integer = a->m_integer & b->m_integer; - break; - case Scalar::e_void: - case Scalar::e_float: - case Scalar::e_double: - case Scalar::e_long_double: - // No bitwise AND on floats, doubles of long doubles + else result.m_type = Scalar::e_void; - break; - } } return result; } @@ -1344,30 +1043,10 @@ const Scalar lldb_private::operator|(const Scalar &lhs, const Scalar &rhs) { const Scalar *b; if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) != Scalar::e_void) { - switch (result.m_type) { - case Scalar::e_sint: - case Scalar::e_uint: - case Scalar::e_slong: - case Scalar::e_ulong: - case Scalar::e_slonglong: - case Scalar::e_ulonglong: - case Scalar::e_sint128: - case Scalar::e_uint128: - case Scalar::e_sint256: - case Scalar::e_uint256: - case Scalar::e_sint512: - case Scalar::e_uint512: + if (GetCategory(result.m_type) == Category::Integral) result.m_integer = a->m_integer | b->m_integer; - break; - - case Scalar::e_void: - case Scalar::e_float: - case Scalar::e_double: - case Scalar::e_long_double: - // No bitwise AND on floats, doubles of long doubles + else result.m_type = Scalar::e_void; - break; - } } return result; } @@ -1379,33 +1058,12 @@ const Scalar lldb_private::operator%(const Scalar &lhs, const Scalar &rhs) { const Scalar *b; if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) != Scalar::e_void) { - switch (result.m_type) { - default: - break; - case Scalar::e_void: - break; - case Scalar::e_sint: - case Scalar::e_slong: - case Scalar::e_slonglong: - case Scalar::e_sint128: - case Scalar::e_sint256: - case Scalar::e_sint512: - if (b->m_integer != 0) { + if (!b->IsZero() && GetCategory(result.m_type) == Category::Integral) { + if (IsSigned(result.m_type)) result.m_integer = a->m_integer.srem(b->m_integer); - return result; - } - break; - case Scalar::e_uint: - case Scalar::e_ulong: - case Scalar::e_ulonglong: - case Scalar::e_uint128: - case Scalar::e_uint256: - case Scalar::e_uint512: - if (b->m_integer != 0) { + else result.m_integer = a->m_integer.urem(b->m_integer); - return result; - } - break; + return result; } } result.m_type = Scalar::e_void; @@ -1419,30 +1077,10 @@ const Scalar lldb_private::operator^(const Scalar &lhs, const Scalar &rhs) { const Scalar *b; if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) != Scalar::e_void) { - switch (result.m_type) { - case Scalar::e_sint: - case Scalar::e_uint: - case Scalar::e_slong: - case Scalar::e_ulong: - case Scalar::e_slonglong: - case Scalar::e_ulonglong: - case Scalar::e_sint128: - case Scalar::e_uint128: - case Scalar::e_sint256: - case Scalar::e_uint256: - case Scalar::e_sint512: - case Scalar::e_uint512: + if (GetCategory(result.m_type) == Category::Integral) result.m_integer = a->m_integer ^ b->m_integer; - break; - - case Scalar::e_void: - case Scalar::e_float: - case Scalar::e_double: - case Scalar::e_long_double: - // No bitwise AND on floats, doubles of long doubles + else result.m_type = Scalar::e_void; - break; - } } return result; } diff --git a/lldb/test/API/functionalities/limit-debug-info/TestLimitDebugInfo.py b/lldb/test/API/functionalities/limit-debug-info/TestLimitDebugInfo.py index d22aeaace7bf08..396861f5eb762a 100644 --- a/lldb/test/API/functionalities/limit-debug-info/TestLimitDebugInfo.py +++ b/lldb/test/API/functionalities/limit-debug-info/TestLimitDebugInfo.py @@ -44,11 +44,16 @@ def test_one_and_two_debug(self): # all members. self.expect_expr("inherits_from_one.member", result_value="47") self.expect_expr("inherits_from_one.one", result_value="142") - self.expect_expr("inherits_from_two.member", result_value="47") self.expect_expr("inherits_from_two.one", result_value="142") self.expect_expr("inherits_from_two.two", result_value="242") + self.expect_expr("one_as_member.member", result_value="47") + self.expect_expr("one_as_member.one.member", result_value="147") + self.expect_expr("two_as_member.member", result_value="47") + self.expect_expr("two_as_member.two.one.member", result_value="147") + self.expect_expr("two_as_member.two.member", result_value="247") + @skipIf(bugnumber="pr46284", debug_info="gmodules") @skipIfWindows # Clang emits type info even with -flimit-debug-info def test_two_debug(self): @@ -63,12 +68,19 @@ def test_two_debug(self): self.expect_expr("inherits_from_one.member", result_value="47") self.expect("expr inherits_from_one.one", error=True, substrs=["no member named 'one' in 'InheritsFromOne'"]) - self.expect_expr("inherits_from_two.member", result_value="47") self.expect("expr inherits_from_two.one", error=True, substrs=["no member named 'one' in 'InheritsFromTwo'"]) self.expect_expr("inherits_from_two.two", result_value="242") + self.expect_expr("one_as_member.member", result_value="47") + self.expect("expr one_as_member.one.member", error=True, + substrs=["no member named 'member' in 'member::One'"]) + self.expect_expr("two_as_member.member", result_value="47") + self.expect("expr two_as_member.two.one.member", error=True, + substrs=["no member named 'member' in 'member::One'"]) + self.expect_expr("two_as_member.two.member", result_value="247") + @skipIf(bugnumber="pr46284", debug_info="gmodules") @skipIfWindows # Clang emits type info even with -flimit-debug-info def test_one_debug(self): @@ -85,9 +97,16 @@ def test_one_debug(self): # "One". self.expect_expr("inherits_from_one.member", result_value="47") self.expect_expr("inherits_from_one.one", result_value="142") - self.expect_expr("inherits_from_two.member", result_value="47") self.expect("expr inherits_from_two.one", error=True, substrs=["no member named 'one' in 'InheritsFromTwo'"]) self.expect("expr inherits_from_two.two", error=True, substrs=["no member named 'two' in 'InheritsFromTwo'"]) + + self.expect_expr("one_as_member.member", result_value="47") + self.expect_expr("one_as_member.one.member", result_value="147") + self.expect_expr("two_as_member.member", result_value="47") + self.expect("expr two_as_member.two.one.member", error=True, + substrs=["no member named 'one' in 'member::Two'"]) + self.expect("expr two_as_member.two.member", error=True, + substrs=["no member named 'member' in 'member::Two'"]) diff --git a/lldb/test/API/functionalities/limit-debug-info/main.cpp b/lldb/test/API/functionalities/limit-debug-info/main.cpp index e3049ed74489c1..886b3feec434dd 100644 --- a/lldb/test/API/functionalities/limit-debug-info/main.cpp +++ b/lldb/test/API/functionalities/limit-debug-info/main.cpp @@ -10,4 +10,16 @@ struct InheritsFromTwo : Two { int member = 47; } inherits_from_two; +struct OneAsMember { + constexpr OneAsMember() = default; + member::One one; + int member = 47; +} one_as_member; + +struct TwoAsMember { + constexpr TwoAsMember() = default; + member::Two two; + int member = 47; +} two_as_member; + int main() { return 0; } diff --git a/lldb/test/API/functionalities/limit-debug-info/one.cpp b/lldb/test/API/functionalities/limit-debug-info/one.cpp index 728875dd9e5572..ee275e3321e419 100644 --- a/lldb/test/API/functionalities/limit-debug-info/one.cpp +++ b/lldb/test/API/functionalities/limit-debug-info/one.cpp @@ -1,3 +1,4 @@ #include "onetwo.h" One::~One() = default; +member::One::~One() = default; diff --git a/lldb/test/API/functionalities/limit-debug-info/onetwo.h b/lldb/test/API/functionalities/limit-debug-info/onetwo.h index 82df76c64b584d..6822d84803fe43 100644 --- a/lldb/test/API/functionalities/limit-debug-info/onetwo.h +++ b/lldb/test/API/functionalities/limit-debug-info/onetwo.h @@ -9,3 +9,18 @@ struct Two : One { constexpr Two() = default; ~Two() override; }; + +namespace member { +struct One { + int member = 147; + constexpr One() = default; + virtual ~One(); +}; + +struct Two { + One one; + int member = 247; + constexpr Two() = default; + virtual ~Two(); +}; +} // namespace member diff --git a/lldb/test/API/functionalities/limit-debug-info/two.cpp b/lldb/test/API/functionalities/limit-debug-info/two.cpp index 928b091728c38f..db98c5e8d3dccc 100644 --- a/lldb/test/API/functionalities/limit-debug-info/two.cpp +++ b/lldb/test/API/functionalities/limit-debug-info/two.cpp @@ -1,3 +1,4 @@ #include "onetwo.h" Two::~Two() = default; +member::Two::~Two() = default; diff --git a/lldb/test/Shell/SymbolFile/DWARF/forward-declarations.s b/lldb/test/Shell/SymbolFile/DWARF/forward-declarations.s deleted file mode 100644 index 952c4ee7f87ed7..00000000000000 --- a/lldb/test/Shell/SymbolFile/DWARF/forward-declarations.s +++ /dev/null @@ -1,111 +0,0 @@ -# Test handling of the situation (including the error message) where a structure -# has a incomplete member. - -# REQUIRES: x86 - -# RUN: llvm-mc -triple x86_64-pc-linux -filetype=obj %s -o %t -# RUN: %lldb %t -o "target var b" -b 2>&1 | FileCheck %s - -# CHECK: error: {{.*}} DWARF DIE at 0x0000002b (class B) has a member variable 0x00000030 (a) whose type is a forward declaration, not a complete definition. -# CHECK-NEXT: Please file a bug against the compiler and include the preprocessed output for /tmp/a.cc - -# CHECK: b = (a = A @ 0x0000000000000001) - - .type b,@object # @b - .comm b,1,1 - .section .debug_str,"MS",@progbits,1 -.Linfo_string0: - .asciz "Hand-written DWARF" -.Lcu_name: - .asciz "/tmp/a.cc" -.Lcu_compdir: - .asciz "/foo/bar" -.Lb: - .asciz "b" -.La: - .asciz "a" -.LA: - .asciz "A" -.LB: - .asciz "B" - - .section .debug_abbrev,"",@progbits - .byte 1 # Abbreviation Code - .byte 17 # DW_TAG_compile_unit - .byte 1 # DW_CHILDREN_yes - .byte 37 # DW_AT_producer - .byte 14 # DW_FORM_strp - .byte 3 # DW_AT_name - .byte 14 # DW_FORM_strp - .byte 27 # DW_AT_comp_dir - .byte 14 # DW_FORM_strp - .byte 0 # EOM(1) - .byte 0 # EOM(2) - .byte 2 # Abbreviation Code - .byte 52 # DW_TAG_variable - .byte 0 # DW_CHILDREN_no - .byte 3 # DW_AT_name - .byte 14 # DW_FORM_strp - .byte 73 # DW_AT_type - .byte 19 # DW_FORM_ref4 - .byte 2 # DW_AT_location - .byte 24 # DW_FORM_exprloc - .byte 0 # EOM(1) - .byte 0 # EOM(2) - .byte 3 # Abbreviation Code - .byte 19 # DW_TAG_structure_type - .byte 1 # DW_CHILDREN_yes - .byte 3 # DW_AT_name - .byte 14 # DW_FORM_strp - .byte 0 # EOM(1) - .byte 0 # EOM(2) - .byte 4 # Abbreviation Code - .byte 13 # DW_TAG_member - .byte 0 # DW_CHILDREN_no - .byte 3 # DW_AT_name - .byte 14 # DW_FORM_strp - .byte 73 # DW_AT_type - .byte 19 # DW_FORM_ref4 - .byte 0 # EOM(1) - .byte 0 # EOM(2) - .byte 5 # Abbreviation Code - .byte 19 # DW_TAG_structure_type - .byte 0 # DW_CHILDREN_no - .byte 60 # DW_AT_declaration - .byte 25 # DW_FORM_flag_present - .byte 3 # DW_AT_name - .byte 14 # DW_FORM_strp - .byte 0 # EOM(1) - .byte 0 # EOM(2) - .byte 0 # EOM(3) - - .section .debug_info,"",@progbits -.Lcu_begin0: - .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit -.Ldebug_info_start0: - .short 4 # DWARF version number - .long .debug_abbrev # Offset Into Abbrev. Section - .byte 8 # Address Size (in bytes) - .byte 1 # Abbrev [1] 0xb:0x46 DW_TAG_compile_unit - .long .Linfo_string0 # DW_AT_producer - .long .Lcu_name # DW_AT_name - .long .Lcu_compdir # DW_AT_comp_dir - .byte 2 # Abbrev [2] 0x1e:0x15 DW_TAG_variable - .long .Lb # DW_AT_name - .long .LB_die-.Lcu_begin0 # DW_AT_type - .byte 9 # DW_AT_location - .byte 3 - .quad b -.LB_die: - .byte 3 # Abbrev [3] 0x33:0x15 DW_TAG_structure_type - .long .LB # DW_AT_name - .byte 4 # Abbrev [4] 0x3b:0xc DW_TAG_member - .long .La # DW_AT_name - .long .LA_die-.Lcu_begin0 # DW_AT_type - .byte 0 # End Of Children Mark -.LA_die: - .byte 5 # Abbrev [5] 0x48:0x8 DW_TAG_structure_type - # DW_AT_declaration - .long .LA # DW_AT_name - .byte 0 # End Of Children Mark -.Ldebug_info_end0: diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index 892f9b6e8de00a..bedf31dc8179fa 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -42,6 +42,10 @@ SetVector DWARFYAML::Data::getUsedSectionNames() const { SecNames.insert("debug_pubnames"); if (PubTypes) SecNames.insert("debug_pubtypes"); + if (GNUPubNames) + SecNames.insert("debug_gnu_pubnames"); + if (GNUPubTypes) + SecNames.insert("debug_gnu_pubtypes"); return SecNames; } diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index c985bd24a4de62..218e7df8e39a51 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -947,6 +947,12 @@ Expected emitDWARF(typename ELFT::Shdr &SHeader, StringRef Name, Err = DWARFYAML::emitPubSection(*OS, *DWARF.PubNames, DWARF.IsLittleEndian); else if (Name == ".debug_pubtypes") Err = DWARFYAML::emitPubSection(*OS, *DWARF.PubTypes, DWARF.IsLittleEndian); + else if (Name == ".debug_gnu_pubnames") + Err = DWARFYAML::emitPubSection(*OS, *DWARF.GNUPubNames, + DWARF.IsLittleEndian, /*IsGNUStyle=*/true); + else if (Name == ".debug_gnu_pubtypes") + Err = DWARFYAML::emitPubSection(*OS, *DWARF.GNUPubTypes, + DWARF.IsLittleEndian, /*IsGNUStyle=*/true); else llvm_unreachable("unexpected emitDWARF() call"); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0c3c7c74458f34..aaeb6b45991545 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -838,6 +838,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::UADDSAT, VT, Legal); setOperationAction(ISD::SSUBSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); + + setOperationAction(ISD::TRUNCATE, VT, Custom); } for (MVT VT : { MVT::v4f16, MVT::v2f32, MVT::v8f16, MVT::v4f32, MVT::v2f64 }) { @@ -1432,6 +1434,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FCMLTz) MAKE_CASE(AArch64ISD::SADDV) MAKE_CASE(AArch64ISD::UADDV) + MAKE_CASE(AArch64ISD::SRHADD) + MAKE_CASE(AArch64ISD::URHADD) MAKE_CASE(AArch64ISD::SMINV) MAKE_CASE(AArch64ISD::UMINV) MAKE_CASE(AArch64ISD::SMAXV) @@ -3260,6 +3264,14 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } + + case Intrinsic::aarch64_neon_srhadd: + case Intrinsic::aarch64_neon_urhadd: { + bool IsSignedAdd = IntNo == Intrinsic::aarch64_neon_srhadd; + unsigned Opcode = IsSignedAdd ? AArch64ISD::SRHADD : AArch64ISD::URHADD; + return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), + Op.getOperand(2)); + } } } @@ -3524,6 +3536,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VSCALE: return LowerVSCALE(Op, DAG); + case ISD::TRUNCATE: + return LowerTRUNCATE(Op, DAG); case ISD::LOAD: if (useSVEForFixedLengthVectorVT(Op.getValueType())) return LowerFixedLengthVectorLoadToSVE(Op, DAG); @@ -8773,6 +8787,78 @@ static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) { return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits)); } +// Attempt to form urhadd(OpA, OpB) from +// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1)). +// The original form of this expression is +// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and before this function +// is called the srl will have been lowered to AArch64ISD::VLSHR and the +// ((OpA + OpB + 1) >> 1) expression will have been changed to (OpB - (~OpA)). +// This pass can also recognize a variant of this pattern that uses sign +// extension instead of zero extension and form a srhadd(OpA, OpB) from it. +SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + + if (!VT.isVector() || VT.isScalableVector()) + return Op; + + // Since we are looking for a right shift by a constant value of 1 and we are + // operating on types at least 16 bits in length (sign/zero extended OpA and + // OpB, which are at least 8 bits), it follows that the truncate will always + // discard the shifted-in bit and therefore the right shift will be logical + // regardless of the signedness of OpA and OpB. + SDValue Shift = Op.getOperand(0); + if (Shift.getOpcode() != AArch64ISD::VLSHR) + return Op; + + // Is the right shift using an immediate value of 1? + uint64_t ShiftAmount = Shift.getConstantOperandVal(1); + if (ShiftAmount != 1) + return Op; + + SDValue Sub = Shift->getOperand(0); + if (Sub.getOpcode() != ISD::SUB) + return Op; + + SDValue Xor = Sub.getOperand(1); + if (Xor.getOpcode() != ISD::XOR) + return Op; + + SDValue ExtendOpA = Xor.getOperand(0); + SDValue ExtendOpB = Sub.getOperand(0); + unsigned ExtendOpAOpc = ExtendOpA.getOpcode(); + unsigned ExtendOpBOpc = ExtendOpB.getOpcode(); + if (!(ExtendOpAOpc == ExtendOpBOpc && + (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND))) + return Op; + + // Is the result of the right shift being truncated to the same value type as + // the original operands, OpA and OpB? + SDValue OpA = ExtendOpA.getOperand(0); + SDValue OpB = ExtendOpB.getOperand(0); + EVT OpAVT = OpA.getValueType(); + assert(ExtendOpA.getValueType() == ExtendOpB.getValueType()); + if (!(VT == OpAVT && OpAVT == OpB.getValueType())) + return Op; + + // Is the XOR using a constant amount of all ones in the right hand side? + uint64_t C; + if (!isAllConstantBuildVector(Xor.getOperand(1), C)) + return Op; + + unsigned ElemSizeInBits = VT.getScalarSizeInBits(); + APInt CAsAPInt(ElemSizeInBits, C); + if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits)) + return Op; + + SDLoc DL(Op); + bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND; + unsigned RHADDOpc = IsSignExtend ? AArch64ISD::SRHADD : AArch64ISD::URHADD; + SDValue ResultURHADD = DAG.getNode(RHADDOpc, DL, VT, OpA, OpB); + + return ResultURHADD; +} + SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -10982,6 +11068,7 @@ static SDValue performConcatVectorsCombine(SDNode *N, SDLoc dl(N); EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode(); // Optimize concat_vectors of truncated vectors, where the intermediate // type is illegal, to avoid said illegality, e.g., @@ -10994,9 +11081,8 @@ static SDValue performConcatVectorsCombine(SDNode *N, // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed // on both input and result type, so we might generate worse code. // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8. - if (N->getNumOperands() == 2 && - N0->getOpcode() == ISD::TRUNCATE && - N1->getOpcode() == ISD::TRUNCATE) { + if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE && + N1Opc == ISD::TRUNCATE) { SDValue N00 = N0->getOperand(0); SDValue N10 = N1->getOperand(0); EVT N00VT = N00.getValueType(); @@ -11021,6 +11107,52 @@ static SDValue performConcatVectorsCombine(SDNode *N, if (DCI.isBeforeLegalizeOps()) return SDValue(); + // Optimise concat_vectors of two [us]rhadds that use extracted subvectors + // from the same original vectors. Combine these into a single [us]rhadd that + // operates on the two original vectors. Example: + // (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>), + // extract_subvector (v16i8 OpB, + // <0>))), + // (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>), + // extract_subvector (v16i8 OpB, + // <8>))))) + // -> + // (v16i8(urhadd(v16i8 OpA, v16i8 OpB))) + if (N->getNumOperands() == 2 && N0Opc == N1Opc && + (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD)) { + SDValue N00 = N0->getOperand(0); + SDValue N01 = N0->getOperand(1); + SDValue N10 = N1->getOperand(0); + SDValue N11 = N1->getOperand(1); + + EVT N00VT = N00.getValueType(); + EVT N10VT = N10.getValueType(); + + if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR && + N01->getOpcode() == ISD::EXTRACT_SUBVECTOR && + N10->getOpcode() == ISD::EXTRACT_SUBVECTOR && + N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) { + SDValue N00Source = N00->getOperand(0); + SDValue N01Source = N01->getOperand(0); + SDValue N10Source = N10->getOperand(0); + SDValue N11Source = N11->getOperand(0); + + if (N00Source == N10Source && N01Source == N11Source && + N00Source.getValueType() == VT && N01Source.getValueType() == VT) { + assert(N0.getValueType() == N1.getValueType()); + + uint64_t N00Index = N00.getConstantOperandVal(1); + uint64_t N01Index = N01.getConstantOperandVal(1); + uint64_t N10Index = N10.getConstantOperandVal(1); + uint64_t N11Index = N11.getConstantOperandVal(1); + + if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 && + N10Index == N00VT.getVectorNumElements()) + return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source); + } + } + } + // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector // splat. The indexed instructions are going to be expecting a DUPLANE64, so // canonicalise to that. @@ -11039,7 +11171,7 @@ static SDValue performConcatVectorsCombine(SDNode *N, // becomes // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS)) - if (N1->getOpcode() != ISD::BITCAST) + if (N1Opc != ISD::BITCAST) return SDValue(); SDValue RHS = N1->getOperand(0); MVT RHSTy = RHS.getValueType().getSimpleVT(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index bc59f0aedc4bf8..210b8c84270193 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -187,6 +187,10 @@ enum NodeType : unsigned { SADDV, UADDV, + // Vector rounding halving addition + SRHADD, + URHADD, + // Vector across-lanes min/max // Only the lower result lane is defined. SMINV, @@ -863,6 +867,7 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 276573f82d660c..f4a5f639e4973d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -554,6 +554,9 @@ def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; +def AArch64srhadd : SDNode<"AArch64ISD::SRHADD", SDT_AArch64binvec>; +def AArch64urhadd : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>; + def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -4073,7 +4076,7 @@ defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrd defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; -defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>; +defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", AArch64srhadd>; defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; @@ -4090,7 +4093,7 @@ defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; -defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>; +defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", AArch64urhadd>; defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 8d9ebe4572315b..7b3c503facf18e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -377,29 +377,31 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, break; } case Instruction::Shl: { - // If we are truncating the result of this SHL, and if it's a shift of a - // constant amount, we can always perform a SHL in a smaller type. - const APInt *Amt; - if (match(I->getOperand(1), m_APInt(Amt))) { - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (Amt->getLimitedValue(BitWidth) < BitWidth) - return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); - } + // If we are truncating the result of this SHL, and if it's a shift of an + // inrange amount, we can always perform a SHL in a smaller type. + uint32_t BitWidth = Ty->getScalarSizeInBits(); + KnownBits AmtKnownBits = + llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout()); + if (AmtKnownBits.getMaxValue().ult(BitWidth)) + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && + canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); break; } case Instruction::LShr: { // If this is a truncate of a logical shr, we can truncate it to a smaller // lshr iff we know that the bits we would otherwise be shifting in are // already zeros. - const APInt *Amt; - if (match(I->getOperand(1), m_APInt(Amt))) { - uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (Amt->getLimitedValue(BitWidth) < BitWidth && - IC.MaskedValueIsZero(I->getOperand(0), - APInt::getBitsSetFrom(OrigBitWidth, BitWidth), 0, CxtI)) { - return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); - } + // TODO: It is enough to check that the bits we would be shifting in are + // zero - use AmtKnownBits.getMaxValue(). + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + KnownBits AmtKnownBits = + llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout()); + APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); + if (AmtKnownBits.getMaxValue().ult(BitWidth) && + IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) { + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && + canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); } break; } @@ -409,15 +411,15 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, // original type and the sign bit of the truncate type are similar. // TODO: It is enough to check that the bits we would be shifting in are // similar to sign bit of the truncate type. - const APInt *Amt; - if (match(I->getOperand(1), m_APInt(Amt))) { - uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (Amt->getLimitedValue(BitWidth) < BitWidth && - OrigBitWidth - BitWidth < - IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI)) - return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); - } + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + KnownBits AmtKnownBits = + llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout()); + unsigned ShiftedBits = OrigBitWidth - BitWidth; + if (AmtKnownBits.getMaxValue().ult(BitWidth) && + ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI)) + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && + canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); break; } case Instruction::Trunc: diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll index cd650e1debf865..a5d223cc8aefbe 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll @@ -1,8 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: shadd8b: -;CHECK: shadd.8b +; CHECK-LABEL: shadd8b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: shadd.8b v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -10,8 +15,12 @@ define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <16 x i8> @shadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: shadd16b: -;CHECK: shadd.16b +; CHECK-LABEL: shadd16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: shadd.16b v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -19,8 +28,12 @@ define <16 x i8> @shadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <4 x i16> @shadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: shadd4h: -;CHECK: shadd.4h +; CHECK-LABEL: shadd4h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: shadd.4h v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -28,8 +41,12 @@ define <4 x i16> @shadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <8 x i16> @shadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: shadd8h: -;CHECK: shadd.8h +; CHECK-LABEL: shadd8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: shadd.8h v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -37,8 +54,12 @@ define <8 x i16> @shadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <2 x i32> @shadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: shadd2s: -;CHECK: shadd.2s +; CHECK-LABEL: shadd2s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: shadd.2s v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -46,8 +67,12 @@ define <2 x i32> @shadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <4 x i32> @shadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: shadd4s: -;CHECK: shadd.4s +; CHECK-LABEL: shadd4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: shadd.4s v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -55,8 +80,12 @@ define <4 x i32> @shadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <8 x i8> @uhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: uhadd8b: -;CHECK: uhadd.8b +; CHECK-LABEL: uhadd8b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: uhadd.8b v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -64,8 +93,12 @@ define <8 x i8> @uhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <16 x i8> @uhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: uhadd16b: -;CHECK: uhadd.16b +; CHECK-LABEL: uhadd16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: uhadd.16b v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -73,8 +106,12 @@ define <16 x i8> @uhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <4 x i16> @uhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: uhadd4h: -;CHECK: uhadd.4h +; CHECK-LABEL: uhadd4h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: uhadd.4h v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -82,8 +119,12 @@ define <4 x i16> @uhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <8 x i16> @uhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: uhadd8h: -;CHECK: uhadd.8h +; CHECK-LABEL: uhadd8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: uhadd.8h v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -91,8 +132,12 @@ define <8 x i16> @uhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <2 x i32> @uhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: uhadd2s: -;CHECK: uhadd.2s +; CHECK-LABEL: uhadd2s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: uhadd.2s v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -100,8 +145,12 @@ define <2 x i32> @uhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <4 x i32> @uhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: uhadd4s: -;CHECK: uhadd.4s +; CHECK-LABEL: uhadd4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: uhadd.4s v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -125,8 +174,12 @@ declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) nounwind declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i8> @srhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: srhadd8b: -;CHECK: srhadd.8b +; CHECK-LABEL: srhadd8b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: srhadd.8b v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -134,8 +187,12 @@ define <8 x i8> @srhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <16 x i8> @srhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: srhadd16b: -;CHECK: srhadd.16b +; CHECK-LABEL: srhadd16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: srhadd.16b v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -143,8 +200,12 @@ define <16 x i8> @srhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <4 x i16> @srhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: srhadd4h: -;CHECK: srhadd.4h +; CHECK-LABEL: srhadd4h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: srhadd.4h v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -152,8 +213,12 @@ define <4 x i16> @srhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <8 x i16> @srhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: srhadd8h: -;CHECK: srhadd.8h +; CHECK-LABEL: srhadd8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: srhadd.8h v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -161,8 +226,12 @@ define <8 x i16> @srhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <2 x i32> @srhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: srhadd2s: -;CHECK: srhadd.2s +; CHECK-LABEL: srhadd2s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: srhadd.2s v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -170,8 +239,12 @@ define <2 x i32> @srhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <4 x i32> @srhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: srhadd4s: -;CHECK: srhadd.4s +; CHECK-LABEL: srhadd4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: srhadd.4s v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -179,8 +252,12 @@ define <4 x i32> @srhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <8 x i8> @urhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: urhadd8b: -;CHECK: urhadd.8b +; CHECK-LABEL: urhadd8b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: urhadd.8b v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -188,8 +265,12 @@ define <8 x i8> @urhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <16 x i8> @urhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: urhadd16b: -;CHECK: urhadd.16b +; CHECK-LABEL: urhadd16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: urhadd.16b v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -197,8 +278,12 @@ define <16 x i8> @urhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <4 x i16> @urhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: urhadd4h: -;CHECK: urhadd.4h +; CHECK-LABEL: urhadd4h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: urhadd.4h v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -206,8 +291,12 @@ define <4 x i16> @urhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <8 x i16> @urhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: urhadd8h: -;CHECK: urhadd.8h +; CHECK-LABEL: urhadd8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: urhadd.8h v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -215,8 +304,12 @@ define <8 x i16> @urhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <2 x i32> @urhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: urhadd2s: -;CHECK: urhadd.2s +; CHECK-LABEL: urhadd2s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: urhadd.2s v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -224,14 +317,210 @@ define <2 x i32> @urhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <4 x i32> @urhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: urhadd4s: -;CHECK: urhadd.4s +; CHECK-LABEL: urhadd4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: urhadd.4s v0, v0, v1 +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } +define void @testLowerToSRHADD8b(<8 x i8> %src1, <8 x i8> %src2, <8 x i8>* %dest) nounwind { +; CHECK-LABEL: testLowerToSRHADD8b: +; CHECK: // %bb.0: +; CHECK-NEXT: srhadd.8b v0, v0, v1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %sextsrc1 = sext <8 x i8> %src1 to <8 x i16> + %sextsrc2 = sext <8 x i8> %src2 to <8 x i16> + %add1 = add <8 x i16> %sextsrc1, %sextsrc2 + %add2 = add <8 x i16> %add1, + %resulti16 = lshr <8 x i16> %add2, + %result = trunc <8 x i16> %resulti16 to <8 x i8> + store <8 x i8> %result, <8 x i8>* %dest, align 8 + ret void +} + +define void @testLowerToSRHADD4h(<4 x i16> %src1, <4 x i16> %src2, <4 x i16>* %dest) nounwind { +; CHECK-LABEL: testLowerToSRHADD4h: +; CHECK: // %bb.0: +; CHECK-NEXT: srhadd.4h v0, v0, v1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %sextsrc1 = sext <4 x i16> %src1 to <4 x i32> + %sextsrc2 = sext <4 x i16> %src2 to <4 x i32> + %add1 = add <4 x i32> %sextsrc1, %sextsrc2 + %add2 = add <4 x i32> %add1, + %resulti16 = lshr <4 x i32> %add2, + %result = trunc <4 x i32> %resulti16 to <4 x i16> + store <4 x i16> %result, <4 x i16>* %dest, align 8 + ret void +} + +define void @testLowerToSRHADD2s(<2 x i32> %src1, <2 x i32> %src2, <2 x i32>* %dest) nounwind { +; CHECK-LABEL: testLowerToSRHADD2s: +; CHECK: // %bb.0: +; CHECK-NEXT: srhadd.2s v0, v0, v1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %sextsrc1 = sext <2 x i32> %src1 to <2 x i64> + %sextsrc2 = sext <2 x i32> %src2 to <2 x i64> + %add1 = add <2 x i64> %sextsrc1, %sextsrc2 + %add2 = add <2 x i64> %add1, + %resulti16 = lshr <2 x i64> %add2, + %result = trunc <2 x i64> %resulti16 to <2 x i32> + store <2 x i32> %result, <2 x i32>* %dest, align 8 + ret void +} + +define void @testLowerToSRHADD16b(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind { +; CHECK-LABEL: testLowerToSRHADD16b: +; CHECK: // %bb.0: +; CHECK-NEXT: srhadd.16b v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %sextsrc1 = sext <16 x i8> %src1 to <16 x i16> + %sextsrc2 = sext <16 x i8> %src2 to <16 x i16> + %add1 = add <16 x i16> %sextsrc1, %sextsrc2 + %add2 = add <16 x i16> %add1, + %resulti16 = lshr <16 x i16> %add2, + %result = trunc <16 x i16> %resulti16 to <16 x i8> + store <16 x i8> %result, <16 x i8>* %dest, align 16 + ret void +} + +define void @testLowerToSRHADD8h(<8 x i16> %src1, <8 x i16> %src2, <8 x i16>* %dest) nounwind { +; CHECK-LABEL: testLowerToSRHADD8h: +; CHECK: // %bb.0: +; CHECK-NEXT: srhadd.8h v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %sextsrc1 = sext <8 x i16> %src1 to <8 x i32> + %sextsrc2 = sext <8 x i16> %src2 to <8 x i32> + %add1 = add <8 x i32> %sextsrc1, %sextsrc2 + %add2 = add <8 x i32> %add1, + %resulti16 = lshr <8 x i32> %add2, + %result = trunc <8 x i32> %resulti16 to <8 x i16> + store <8 x i16> %result, <8 x i16>* %dest, align 16 + ret void +} + +define void @testLowerToSRHADD4s(<4 x i32> %src1, <4 x i32> %src2, <4 x i32>* %dest) nounwind { +; CHECK-LABEL: testLowerToSRHADD4s: +; CHECK: // %bb.0: +; CHECK-NEXT: srhadd.4s v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %sextsrc1 = sext <4 x i32> %src1 to <4 x i64> + %sextsrc2 = sext <4 x i32> %src2 to <4 x i64> + %add1 = add <4 x i64> %sextsrc1, %sextsrc2 + %add2 = add <4 x i64> %add1, + %resulti16 = lshr <4 x i64> %add2, + %result = trunc <4 x i64> %resulti16 to <4 x i32> + store <4 x i32> %result, <4 x i32>* %dest, align 16 + ret void +} + +define void @testLowerToURHADD8b(<8 x i8> %src1, <8 x i8> %src2, <8 x i8>* %dest) nounwind { +; CHECK-LABEL: testLowerToURHADD8b: +; CHECK: // %bb.0: +; CHECK-NEXT: urhadd.8b v0, v0, v1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %zextsrc1 = zext <8 x i8> %src1 to <8 x i16> + %zextsrc2 = zext <8 x i8> %src2 to <8 x i16> + %add1 = add <8 x i16> %zextsrc1, %zextsrc2 + %add2 = add <8 x i16> %add1, + %resulti16 = lshr <8 x i16> %add2, + %result = trunc <8 x i16> %resulti16 to <8 x i8> + store <8 x i8> %result, <8 x i8>* %dest, align 8 + ret void +} + +define void @testLowerToURHADD4h(<4 x i16> %src1, <4 x i16> %src2, <4 x i16>* %dest) nounwind { +; CHECK-LABEL: testLowerToURHADD4h: +; CHECK: // %bb.0: +; CHECK-NEXT: urhadd.4h v0, v0, v1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %zextsrc1 = zext <4 x i16> %src1 to <4 x i32> + %zextsrc2 = zext <4 x i16> %src2 to <4 x i32> + %add1 = add <4 x i32> %zextsrc1, %zextsrc2 + %add2 = add <4 x i32> %add1, + %resulti16 = lshr <4 x i32> %add2, + %result = trunc <4 x i32> %resulti16 to <4 x i16> + store <4 x i16> %result, <4 x i16>* %dest, align 8 + ret void +} + +define void @testLowerToURHADD2s(<2 x i32> %src1, <2 x i32> %src2, <2 x i32>* %dest) nounwind { +; CHECK-LABEL: testLowerToURHADD2s: +; CHECK: // %bb.0: +; CHECK-NEXT: urhadd.2s v0, v0, v1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %zextsrc1 = zext <2 x i32> %src1 to <2 x i64> + %zextsrc2 = zext <2 x i32> %src2 to <2 x i64> + %add1 = add <2 x i64> %zextsrc1, %zextsrc2 + %add2 = add <2 x i64> %add1, + %resulti16 = lshr <2 x i64> %add2, + %result = trunc <2 x i64> %resulti16 to <2 x i32> + store <2 x i32> %result, <2 x i32>* %dest, align 8 + ret void +} + +define void @testLowerToURHADD16b(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind { +; CHECK-LABEL: testLowerToURHADD16b: +; CHECK: // %bb.0: +; CHECK-NEXT: urhadd.16b v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %zextsrc1 = zext <16 x i8> %src1 to <16 x i16> + %zextsrc2 = zext <16 x i8> %src2 to <16 x i16> + %add1 = add <16 x i16> %zextsrc1, %zextsrc2 + %add2 = add <16 x i16> %add1, + %resulti16 = lshr <16 x i16> %add2, + %result = trunc <16 x i16> %resulti16 to <16 x i8> + store <16 x i8> %result, <16 x i8>* %dest, align 16 + ret void +} + +define void @testLowerToURHADD8h(<8 x i16> %src1, <8 x i16> %src2, <8 x i16>* %dest) nounwind { +; CHECK-LABEL: testLowerToURHADD8h: +; CHECK: // %bb.0: +; CHECK-NEXT: urhadd.8h v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> + %zextsrc2 = zext <8 x i16> %src2 to <8 x i32> + %add1 = add <8 x i32> %zextsrc1, %zextsrc2 + %add2 = add <8 x i32> %add1, + %resulti16 = lshr <8 x i32> %add2, + %result = trunc <8 x i32> %resulti16 to <8 x i16> + store <8 x i16> %result, <8 x i16>* %dest, align 16 + ret void +} + +define void @testLowerToURHADD4s(<4 x i32> %src1, <4 x i32> %src2, <4 x i32>* %dest) nounwind { +; CHECK-LABEL: testLowerToURHADD4s: +; CHECK: // %bb.0: +; CHECK-NEXT: urhadd.4s v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %zextsrc1 = zext <4 x i32> %src1 to <4 x i64> + %zextsrc2 = zext <4 x i32> %src2 to <4 x i64> + %add1 = add <4 x i64> %zextsrc1, %zextsrc2 + %add2 = add <4 x i64> %add1, + %resulti16 = lshr <4 x i64> %add2, + %result = trunc <4 x i64> %resulti16 to <4 x i32> + store <4 x i32> %result, <4 x i32>* %dest, align 16 + ret void +} + declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone diff --git a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll index 999b5d58f43898..89e4a3c1aaed63 100644 --- a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll +++ b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll @@ -35,12 +35,10 @@ define <2 x i16> @test1_vec(<2 x i16> %a) { define <2 x i16> @test1_vec_nonuniform(<2 x i16> %a) { ; CHECK-LABEL: @test1_vec_nonuniform( -; CHECK-NEXT: [[B:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], -; CHECK-NEXT: [[D:%.*]] = mul nuw nsw <2 x i32> [[B]], -; CHECK-NEXT: [[E:%.*]] = or <2 x i32> [[C]], [[D]] -; CHECK-NEXT: [[F:%.*]] = trunc <2 x i32> [[E]] to <2 x i16> -; CHECK-NEXT: ret <2 x i16> [[F]] +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i16> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = mul <2 x i16> [[A]], +; CHECK-NEXT: [[E:%.*]] = or <2 x i16> [[C]], [[D]] +; CHECK-NEXT: ret <2 x i16> [[E]] ; %b = zext <2 x i16> %a to <2 x i32> %c = lshr <2 x i32> %b, diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index 10d59bfff57f47..18b411103122c0 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -502,12 +502,10 @@ define <2 x i16> @test40vec(<2 x i16> %a) { define <2 x i16> @test40vec_nonuniform(<2 x i16> %a) { ; ALL-LABEL: @test40vec_nonuniform( -; ALL-NEXT: [[T:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[T21:%.*]] = lshr <2 x i32> [[T]], -; ALL-NEXT: [[T5:%.*]] = shl <2 x i32> [[T]], -; ALL-NEXT: [[T32:%.*]] = or <2 x i32> [[T21]], [[T5]] -; ALL-NEXT: [[R:%.*]] = trunc <2 x i32> [[T32]] to <2 x i16> -; ALL-NEXT: ret <2 x i16> [[R]] +; ALL-NEXT: [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], +; ALL-NEXT: [[T5:%.*]] = shl <2 x i16> [[A]], +; ALL-NEXT: [[T32:%.*]] = or <2 x i16> [[T21]], [[T5]] +; ALL-NEXT: ret <2 x i16> [[T32]] ; %t = zext <2 x i16> %a to <2 x i32> %t21 = lshr <2 x i32> %t, diff --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll index 4e9f440978a59b..d8a615cc4c9a38 100644 --- a/llvm/test/Transforms/InstCombine/trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc.ll @@ -286,12 +286,11 @@ define <2 x i64> @test8_vec(<2 x i32> %A, <2 x i32> %B) { define <2 x i64> @test8_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test8_vec_nonuniform( -; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128> -; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> -; CHECK-NEXT: [[E:%.*]] = shl <2 x i128> [[D]], -; CHECK-NEXT: [[F:%.*]] = or <2 x i128> [[E]], [[C]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[G]] +; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[E:%.*]] = shl <2 x i64> [[D]], +; CHECK-NEXT: [[F:%.*]] = or <2 x i64> [[E]], [[C]] +; CHECK-NEXT: ret <2 x i64> [[F]] ; %C = zext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> @@ -343,12 +342,11 @@ define i8 @test10(i32 %X) { define i64 @test11(i32 %A, i32 %B) { ; CHECK-LABEL: @test11( -; CHECK-NEXT: [[C:%.*]] = zext i32 [[A:%.*]] to i128 +; CHECK-NEXT: [[C:%.*]] = zext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 31 -; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i128 -; CHECK-NEXT: [[F:%.*]] = shl i128 [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc i128 [[F]] to i64 -; CHECK-NEXT: ret i64 [[G]] +; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[F:%.*]] = shl i64 [[C]], [[E]] +; CHECK-NEXT: ret i64 [[F]] ; %C = zext i32 %A to i128 %D = zext i32 %B to i128 @@ -360,12 +358,11 @@ define i64 @test11(i32 %A, i32 %B) { define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test11_vec( -; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128> +; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128> -; CHECK-NEXT: [[F:%.*]] = shl <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[G]] +; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[F:%.*]] = shl <2 x i64> [[C]], [[E]] +; CHECK-NEXT: ret <2 x i64> [[F]] ; %C = zext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> @@ -377,12 +374,11 @@ define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) { define <2 x i64> @test11_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test11_vec_nonuniform( -; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128> +; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128> -; CHECK-NEXT: [[F:%.*]] = shl <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[G]] +; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[F:%.*]] = shl <2 x i64> [[C]], [[E]] +; CHECK-NEXT: ret <2 x i64> [[F]] ; %C = zext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> @@ -411,12 +407,11 @@ define <2 x i64> @test11_vec_undef(<2 x i32> %A, <2 x i32> %B) { define i64 @test12(i32 %A, i32 %B) { ; CHECK-LABEL: @test12( -; CHECK-NEXT: [[C:%.*]] = zext i32 [[A:%.*]] to i128 +; CHECK-NEXT: [[C:%.*]] = zext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 31 -; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i128 -; CHECK-NEXT: [[F:%.*]] = lshr i128 [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc i128 [[F]] to i64 -; CHECK-NEXT: ret i64 [[G]] +; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[F:%.*]] = lshr i64 [[C]], [[E]] +; CHECK-NEXT: ret i64 [[F]] ; %C = zext i32 %A to i128 %D = zext i32 %B to i128 @@ -428,12 +423,11 @@ define i64 @test12(i32 %A, i32 %B) { define <2 x i64> @test12_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test12_vec( -; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128> +; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128> -; CHECK-NEXT: [[F:%.*]] = lshr <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[G]] +; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[F:%.*]] = lshr <2 x i64> [[C]], [[E]] +; CHECK-NEXT: ret <2 x i64> [[F]] ; %C = zext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> @@ -445,12 +439,11 @@ define <2 x i64> @test12_vec(<2 x i32> %A, <2 x i32> %B) { define <2 x i64> @test12_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test12_vec_nonuniform( -; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128> +; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128> -; CHECK-NEXT: [[F:%.*]] = lshr <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[G]] +; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[F:%.*]] = lshr <2 x i64> [[C]], [[E]] +; CHECK-NEXT: ret <2 x i64> [[F]] ; %C = zext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> @@ -479,12 +472,11 @@ define <2 x i64> @test12_vec_undef(<2 x i32> %A, <2 x i32> %B) { define i64 @test13(i32 %A, i32 %B) { ; CHECK-LABEL: @test13( -; CHECK-NEXT: [[C:%.*]] = sext i32 [[A:%.*]] to i128 +; CHECK-NEXT: [[C:%.*]] = sext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 31 -; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i128 -; CHECK-NEXT: [[F:%.*]] = ashr i128 [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc i128 [[F]] to i64 -; CHECK-NEXT: ret i64 [[G]] +; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[F:%.*]] = ashr i64 [[C]], [[E]] +; CHECK-NEXT: ret i64 [[F]] ; %C = sext i32 %A to i128 %D = zext i32 %B to i128 @@ -496,12 +488,11 @@ define i64 @test13(i32 %A, i32 %B) { define <2 x i64> @test13_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test13_vec( -; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i128> +; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128> -; CHECK-NEXT: [[F:%.*]] = ashr <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[G]] +; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[F:%.*]] = ashr <2 x i64> [[C]], [[E]] +; CHECK-NEXT: ret <2 x i64> [[F]] ; %C = sext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> @@ -513,12 +504,11 @@ define <2 x i64> @test13_vec(<2 x i32> %A, <2 x i32> %B) { define <2 x i64> @test13_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test13_vec_nonuniform( -; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i128> +; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128> -; CHECK-NEXT: [[F:%.*]] = ashr <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[G]] +; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[F:%.*]] = ashr <2 x i64> [[C]], [[E]] +; CHECK-NEXT: ret <2 x i64> [[F]] ; %C = sext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-gnu-pubnames.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-gnu-pubnames.yaml new file mode 100644 index 00000000000000..71c7981117554e --- /dev/null +++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-gnu-pubnames.yaml @@ -0,0 +1,261 @@ +## Test that yaml2obj emits .debug_gnu_pubnames section. + +## a) Generate the '.debug_gnu_pubnames' section from the 'DWARF' entry. + +## Generate and verify a 32-bit little endian .debug_gnu_pubnames section. + +# RUN: yaml2obj --docnum=1 -DENDIAN=ELFDATA2LSB %s -o %t1.le.o +# RUN: llvm-readobj --sections --section-data %t1.le.o | \ +# RUN: FileCheck -DSIZE=32 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-LE + +# SHDR: Index: 1 +# SHDR-NEXT: Name: .debug_gnu_pubnames (1) +# SHDR-NEXT: Type: SHT_PROGBITS (0x1) +# SHDR-NEXT: Flags [ (0x0) +# SHDR-NEXT: ] +# SHDR-NEXT: Address: 0x0 +# SHDR-NEXT: Offset: 0x40 +# SHDR-NEXT: Size: [[SIZE]] +# SHDR-NEXT: Link: 0 +# SHDR-NEXT: Info: 0 +# SHDR-NEXT: AddressAlignment: [[ADDRALIGN]] +# SHDR-NEXT: EntrySize: 0 +# DWARF32-LE-NEXT: SectionData ( +# DWARF32-LE-NEXT: 0000: 34120000 02003412 00002143 00007856 |4.....4...!C..xV| +## ^------- unit_length (4-byte) +## ^--- version (2-byte) +## ^-------- debug_info_offset (4-byte) +## ^-------- debug_info_length (4-byte) +## ^--- offset (4-byte) +# DWARF32-LE-NEXT: 0010: 34123061 62630021 43658730 64656600 |4.0abc.!Ce.0def.| +## ---- +## ^- descriptor (1-byte) +## ^-------- name "abc\0" +## ^-------- offset (4-byte) +## ^- descriptor (1-byte) +## ^------- name "def\0" +# DWARF32-LE-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: [[ENDIAN]] + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_gnu_pubnames: + Length: + TotalLength: 0x1234 + Version: 2 + UnitOffset: 0x1234 + UnitSize: 0x4321 + Entries: + - DieOffset: 0x12345678 + Descriptor: 0x30 + Name: abc + - DieOffset: 0x87654321 + Descriptor: 0x30 + Name: def + +## Generate and verify a 32-bit big endian .debug_gnu_pubnames section. + +# RUN: yaml2obj --docnum=1 -DENDIAN=ELFDATA2MSB %s -o %t1.be.o +# RUN: llvm-readobj --sections --section-data %t1.be.o | \ +# RUN: FileCheck -DSIZE=32 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-BE + +# DWARF32-BE-NEXT: SectionData ( +# DWARF32-BE-NEXT: 0000: 00001234 00020000 12340000 43211234 |...4.....4..C!.4| +## ^------- unit_length (4-byte) +## ^--- version (2-byte) +## ^-------- debug_info_offset (4-byte) +## ^-------- debug_info_length (4-byte) +## ^--- offset (4-byte) +# DWARF32-BE-NEXT: 0010: 56783061 62630087 65432130 64656600 |Vx0abc..eC!0def.| +## ---- +## ^- descriptor (1-byte) +## ^-------- name "abc\0" +## ^-------- offset (4-byte) +## ^- descriptor (1-byte) +## ^------- name "def\0" +# DWARF32-BE-NEXT: ) + +## b) Generate the .debug_gnu_pubnames section from raw section content. + +# RUN: yaml2obj --docnum=2 %s -o %t2.o +# RUN: llvm-readobj --sections --section-data %t2.o | \ +# RUN: FileCheck %s -DADDRALIGN=0 -DSIZE=3 --check-prefixes=SHDR,ARBITRARY-CONTENT + +# ARBITRARY-CONTENT: SectionData ( +# ARBITRARY-CONTENT-NEXT: 0000: 112233 +# ARBITRARY-CONTENT-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_gnu_pubnames + Type: SHT_PROGBITS + Content: "112233" + +## c) Generate the .debug_gnu_pubnames section when the "Size" is specified. + +# RUN: yaml2obj --docnum=3 %s -o %t3.o +# RUN: llvm-readobj --sections --section-data %t3.o | \ +# RUN: FileCheck -DSIZE=16 -DADDRALIGN=0 %s --check-prefixes=SHDR,SIZE + +# SIZE: SectionData ( +# SIZE-NEXT: 0000: 00000000 00000000 00000000 00000000 |................| +# SIZE-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_gnu_pubnames + Type: SHT_PROGBITS + Size: 0x10 + +## d) Test that yaml2obj emits an error message when both the "Size" and the +## "debug_gnu_pubnames" entry are specified at the same time. + +# RUN: not yaml2obj --docnum=4 %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR: yaml2obj: error: cannot specify section '.debug_gnu_pubnames' contents in the 'DWARF' entry and the 'Content' or 'Size' in the 'Sections' entry at the same time + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_gnu_pubnames + Type: SHT_PROGBITS + Size: 0x10 +DWARF: + debug_gnu_pubnames: + Length: + TotalLength: 0x1234 + Version: 2 + UnitOffset: 0x1234 + UnitSize: 0x4321 + Entries: [] + +## e) Test that yaml2obj emits an error message when both the "Content" and the +## "debug_gnu_pubnames" entry are specified at the same time. + +# RUN: not yaml2obj --docnum=5 %s 2>&1 | FileCheck %s --check-prefix=ERROR + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_gnu_pubnames + Type: SHT_PROGBITS + Content: "00" +DWARF: + debug_gnu_pubnames: + Length: + TotalLength: 0x1234 + Version: 2 + UnitOffset: 0x1234 + UnitSize: 0x4321 + Entries: [] + +## f) Test that all the properties can be overridden by the section header when +## the "debug_gnu_pubnames" entry doesn't exist. + +# RUN: yaml2obj --docnum=6 %s -o %t6.o +# RUN: llvm-readelf --sections %t6.o | FileCheck %s --check-prefix=OVERRIDDEN + +# OVERRIDDEN: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# OVERRIDDEN: [ 1] .debug_gnu_pubnames STRTAB 0000000000002020 000050 00000e 01 A 2 1 2 +# OVERRIDDEN-NEXT: [ 2] .sec STRTAB 0000000000000000 00005e 000000 00 0 0 0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_gnu_pubnames + Type: SHT_STRTAB ## SHT_PROGBITS by default. + Flags: [SHF_ALLOC] ## 0 by default. + Link: .sec ## 0 by default. + EntSize: 1 ## 0 by default. + Info: 1 ## 0 by default. + AddressAlign: 2 ## 0 by default. + Address: 0x2020 ## 0x00 by default. + Offset: 0x50 ## 0x40 for the first section. + Size: 0x0e ## Set the "Size" so that we can reuse the check tag "OVERRIDDEN". + - Name: .sec ## Linked by .debug_gnu_pubnames. + Type: SHT_STRTAB + +## g) Test that all the properties can be overridden by the section header when +## the "debug_gnu_pubnames" entry exists. + +# RUN: yaml2obj --docnum=7 %s -o %t7.o +# RUN: llvm-readelf --sections %t7.o | FileCheck %s --check-prefix=OVERRIDDEN + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_gnu_pubnames + Type: SHT_STRTAB ## SHT_PROGBITS by default. + Flags: [SHF_ALLOC] ## 0 by default. + Link: .sec ## 0 by default. + EntSize: 1 ## 0 by default. + Info: 1 ## 0 by default. + AddressAlign: 2 ## 1 by default. + Address: 0x2020 ## 0x00 by default. + Offset: 0x50 ## 0x40 for the first section. + - Name: .sec ## Linked by .debug_gnu_pubnames. + Type: SHT_STRTAB +DWARF: + debug_gnu_pubnames: + Length: + TotalLength: 0x1234 + Version: 2 + UnitOffset: 0x1234 + UnitSize: 0x4321 + Entries: [] + +## h) Test that yaml2obj emits an error if 'Descriptor' is missing. + +# RUN: not yaml2obj --docnum=8 %s -o %t8.o 2>&1 | FileCheck %s --check-prefix=MISSING-KEY --ignore-case + +# MISSING-KEY: YAML:260:9: error: missing required key 'Descriptor' +# MISSING-KEY-NEXT: - DieOffset: 0x12345678 +# MISSING-KEY-NEXT: ^ +# MISSING-KEY-NEXT: yaml2obj: error: failed to parse YAML input: Invalid argument + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_gnu_pubnames: + Length: + TotalLength: 0x1234 + Version: 2 + UnitOffset: 0x1234 + UnitSize: 0x4321 + Entries: + - DieOffset: 0x12345678 + Name: abc diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-gnu-pubtypes.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-gnu-pubtypes.yaml new file mode 100644 index 00000000000000..b88d21b5fc7af5 --- /dev/null +++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-gnu-pubtypes.yaml @@ -0,0 +1,261 @@ +## Test that yaml2obj emits .debug_gnu_pubtypes section. + +## a) Generate the '.debug_gnu_pubtypes' section from the 'DWARF' entry. + +## Generate and verify a 32-bit little endian .debug_gnu_pubtypes section. + +# RUN: yaml2obj --docnum=1 -DENDIAN=ELFDATA2LSB %s -o %t1.le.o +# RUN: llvm-readobj --sections --section-data %t1.le.o | \ +# RUN: FileCheck -DSIZE=32 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-LE + +# SHDR: Index: 1 +# SHDR-NEXT: Name: .debug_gnu_pubtypes (1) +# SHDR-NEXT: Type: SHT_PROGBITS (0x1) +# SHDR-NEXT: Flags [ (0x0) +# SHDR-NEXT: ] +# SHDR-NEXT: Address: 0x0 +# SHDR-NEXT: Offset: 0x40 +# SHDR-NEXT: Size: [[SIZE]] +# SHDR-NEXT: Link: 0 +# SHDR-NEXT: Info: 0 +# SHDR-NEXT: AddressAlignment: [[ADDRALIGN]] +# SHDR-NEXT: EntrySize: 0 +# DWARF32-LE-NEXT: SectionData ( +# DWARF32-LE-NEXT: 0000: 34120000 02003412 00002143 00007856 |4.....4...!C..xV| +## ^------- unit_length (4-byte) +## ^--- version (2-byte) +## ^-------- debug_info_offset (4-byte) +## ^-------- debug_info_length (4-byte) +## ^--- offset (4-byte) +# DWARF32-LE-NEXT: 0010: 34121261 62630021 43658734 64656600 |4..abc.!Ce.4def.| +## ---- +## ^- descriptor (1-byte) +## ^-------- name "abc\0" +## ^-------- offset (4-byte) +## ^- descriptor (1-byte) +## ^------- name "def\0" +# DWARF32-LE-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: [[ENDIAN]] + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_gnu_pubtypes: + Length: + TotalLength: 0x1234 + Version: 2 + UnitOffset: 0x1234 + UnitSize: 0x4321 + Entries: + - DieOffset: 0x12345678 + Descriptor: 0x12 + Name: abc + - DieOffset: 0x87654321 + Descriptor: 0x34 + Name: def + +## Generate and verify a 32-bit big endian .debug_gnu_pubtypes section. + +# RUN: yaml2obj --docnum=1 -DENDIAN=ELFDATA2MSB %s -o %t1.be.o +# RUN: llvm-readobj --sections --section-data %t1.be.o | \ +# RUN: FileCheck -DSIZE=32 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-BE + +# DWARF32-BE-NEXT: SectionData ( +# DWARF32-BE-NEXT: 0000: 00001234 00020000 12340000 43211234 |...4.....4..C!.4| +## ^------- unit_length (4-byte) +## ^--- version (2-byte) +## ^-------- debug_info_offset (4-byte) +## ^-------- debug_info_length (4-byte) +## ^--- offset (4-byte) +# DWARF32-BE-NEXT: 0010: 56781261 62630087 65432134 64656600 |Vx.abc..eC!4def.| +## ---- +## ^- descriptor (1-byte) +## ^-------- name "abc\0" +## ^-------- offset (4-byte) +## ^- descriptor (1-byte) +## ^------- name "def\0" +# DWARF32-BE-NEXT: ) + +## b) Generate the .debug_gnu_pubtypes section from raw section content. + +# RUN: yaml2obj --docnum=2 %s -o %t2.o +# RUN: llvm-readobj --sections --section-data %t2.o | \ +# RUN: FileCheck %s -DADDRALIGN=0 -DSIZE=3 --check-prefixes=SHDR,ARBITRARY-CONTENT + +# ARBITRARY-CONTENT: SectionData ( +# ARBITRARY-CONTENT-NEXT: 0000: 112233 +# ARBITRARY-CONTENT-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_gnu_pubtypes + Type: SHT_PROGBITS + Content: "112233" + +## c) Generate the .debug_gnu_pubtypes section when the "Size" is specified. + +# RUN: yaml2obj --docnum=3 %s -o %t3.o +# RUN: llvm-readobj --sections --section-data %t3.o | \ +# RUN: FileCheck -DSIZE=16 -DADDRALIGN=0 %s --check-prefixes=SHDR,SIZE + +# SIZE: SectionData ( +# SIZE-NEXT: 0000: 00000000 00000000 00000000 00000000 |................| +# SIZE-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_gnu_pubtypes + Type: SHT_PROGBITS + Size: 0x10 + +## d) Test that yaml2obj emits an error message when both the "Size" and the +## "debug_gnu_pubtypes" entry are specified at the same time. + +# RUN: not yaml2obj --docnum=4 %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR: yaml2obj: error: cannot specify section '.debug_gnu_pubtypes' contents in the 'DWARF' entry and the 'Content' or 'Size' in the 'Sections' entry at the same time + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_gnu_pubtypes + Type: SHT_PROGBITS + Size: 0x10 +DWARF: + debug_gnu_pubtypes: + Length: + TotalLength: 0x1234 + Version: 2 + UnitOffset: 0x1234 + UnitSize: 0x4321 + Entries: [] + +## e) Test that yaml2obj emits an error message when both the "Content" and the +## "debug_gnu_pubtypes" entry are specified at the same time. + +# RUN: not yaml2obj --docnum=5 %s 2>&1 | FileCheck %s --check-prefix=ERROR + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_gnu_pubtypes + Type: SHT_PROGBITS + Content: "00" +DWARF: + debug_gnu_pubtypes: + Length: + TotalLength: 0x1234 + Version: 2 + UnitOffset: 0x1234 + UnitSize: 0x4321 + Entries: [] + +## f) Test that all the properties can be overridden by the section header when +## the "debug_gnu_pubtypes" entry doesn't exist. + +# RUN: yaml2obj --docnum=6 %s -o %t6.o +# RUN: llvm-readelf --sections %t6.o | FileCheck %s --check-prefix=OVERRIDDEN + +# OVERRIDDEN: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# OVERRIDDEN: [ 1] .debug_gnu_pubtypes STRTAB 0000000000002020 000050 00000e 01 A 2 1 2 +# OVERRIDDEN-NEXT: [ 2] .sec STRTAB 0000000000000000 00005e 000000 00 0 0 0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_gnu_pubtypes + Type: SHT_STRTAB ## SHT_PROGBITS by default. + Flags: [SHF_ALLOC] ## 0 by default. + Link: .sec ## 0 by default. + EntSize: 1 ## 0 by default. + Info: 1 ## 0 by default. + AddressAlign: 2 ## 0 by default. + Address: 0x2020 ## 0x00 by default. + Offset: 0x50 ## 0x40 for the first section. + Size: 0x0e ## Set the "Size" so that we can reuse the check tag "OVERRIDDEN". + - Name: .sec ## Linked by .debug_gnu_pubtypes. + Type: SHT_STRTAB + +## g) Test that all the properties can be overridden by the section header when +## the "debug_gnu_pubtypes" entry exists. + +# RUN: yaml2obj --docnum=7 %s -o %t7.o +# RUN: llvm-readelf --sections %t7.o | FileCheck %s --check-prefix=OVERRIDDEN + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_gnu_pubtypes + Type: SHT_STRTAB ## SHT_PROGBITS by default. + Flags: [SHF_ALLOC] ## 0 by default. + Link: .sec ## 0 by default. + EntSize: 1 ## 0 by default. + Info: 1 ## 0 by default. + AddressAlign: 2 ## 1 by default. + Address: 0x2020 ## 0x00 by default. + Offset: 0x50 ## 0x40 for the first section. + - Name: .sec ## Linked by .debug_gnu_pubtypes. + Type: SHT_STRTAB +DWARF: + debug_gnu_pubtypes: + Length: + TotalLength: 0x1234 + Version: 2 + UnitOffset: 0x1234 + UnitSize: 0x4321 + Entries: [] + +## h) Test that yaml2obj emits an error if 'Descriptor' is missing. + +# RUN: not yaml2obj --docnum=8 %s -o %t8.o 2>&1 | FileCheck %s --check-prefix=MISSING-KEY --ignore-case + +# MISSING-KEY: YAML:260:9: error: missing required key 'Descriptor' +# MISSING-KEY-NEXT: - DieOffset: 0x12345678 +# MISSING-KEY-NEXT: ^ +# MISSING-KEY-NEXT: yaml2obj: error: failed to parse YAML input: Invalid argument + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_gnu_pubtypes: + Length: + TotalLength: 0x1234 + Version: 2 + UnitOffset: 0x1234 + UnitSize: 0x4321 + Entries: + - DieOffset: 0x12345678 + Name: abc diff --git a/llvm/unittests/ObjectYAML/DWARFYAMLTest.cpp b/llvm/unittests/ObjectYAML/DWARFYAMLTest.cpp index 138c0999c1cc1b..e93773999ad30e 100644 --- a/llvm/unittests/ObjectYAML/DWARFYAMLTest.cpp +++ b/llvm/unittests/ObjectYAML/DWARFYAMLTest.cpp @@ -45,8 +45,8 @@ TEST(DebugAddrSection, TestParseDebugAddrYAML) { Length: 0x1234 Version: 5 )"; - auto SectionsOrErr = DWARFYAML::emitDebugSections(Yaml); - EXPECT_THAT_EXPECTED(SectionsOrErr, Succeeded()); + auto DWARFOrErr = parseDWARFYAML(Yaml); + EXPECT_THAT_EXPECTED(DWARFOrErr, Succeeded()); } TEST(DebugAddrSection, TestMissingVersion) { @@ -55,8 +55,8 @@ TEST(DebugAddrSection, TestMissingVersion) { - Format: DWARF64 Length: 0x1234 )"; - auto SectionsOrErr = DWARFYAML::emitDebugSections(Yaml); - EXPECT_THAT_ERROR(SectionsOrErr.takeError(), + auto DWARFOrErr = parseDWARFYAML(Yaml); + EXPECT_THAT_ERROR(DWARFOrErr.takeError(), FailedWithMessage("missing required key 'Version'")); } @@ -68,8 +68,8 @@ TEST(DebugAddrSection, TestUnexpectedKey) { Version: 5 Blah: unexpected )"; - auto SectionsOrErr = DWARFYAML::emitDebugSections(Yaml); - EXPECT_THAT_ERROR(SectionsOrErr.takeError(), + auto DWARFOrErr = parseDWARFYAML(Yaml); + EXPECT_THAT_ERROR(DWARFOrErr.takeError(), FailedWithMessage("unknown key 'Blah'")); } diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp index 577d52188351e7..71d397b47ea249 100644 --- a/mlir/lib/Transforms/BufferPlacement.cpp +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -142,6 +142,12 @@ class BufferPlacementAliasAnalysis { this->aliases[std::get<0>(entry)].insert(std::get<1>(entry)); }; + // Add additional aliases created by view changes to the alias list. + op->walk([&](ViewLikeOpInterface viewInterface) { + aliases[viewInterface.getViewSource()].insert( + viewInterface.getOperation()->getResult(0)); + }); + // Query all branch interfaces to link block argument aliases. op->walk([&](BranchOpInterface branchInterface) { Block *parentBlock = branchInterface.getOperation()->getBlock(); diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir index 225a186caeb0af..c3bce4ea545839 100644 --- a/mlir/test/Transforms/buffer-placement.mlir +++ b/mlir/test/Transforms/buffer-placement.mlir @@ -914,3 +914,22 @@ func @inner_region_control_flow_div( // CHECK-NEXT: test.region_if_yield %[[ALLOC8]] // CHECK: dealloc %[[ALLOC0]] // CHECK-NEXT: return %[[ALLOC1]] + +// ----- + +// CHECK-LABEL: func @subview +func @subview(%arg0 : index, %arg1 : index, %arg2 : memref) { + %0 = alloc() : memref<64x4xf32, offset: 0, strides: [4, 1]> + %1 = subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] : + memref<64x4xf32, offset: 0, strides: [4, 1]> + to memref + "linalg.copy"(%1, %arg2) : + (memref, memref) -> () + return +} + +// CHECK-NEXT: %[[ALLOC:.*]] = alloc() +// CHECK-NEXT: subview +// CHECK-NEXT: linalg.copy +// CHECK-NEXT: dealloc %[[ALLOC]] +// CHECK-NEXT: return