diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index a32bc0c84c7018..ecc4e7ee19fbc1 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -9374,16 +9374,22 @@ static Comparison compareEnableIfAttrs(const Sema &S, const FunctionDecl *Cand1, return Comparison::Equal; } -static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1, - const OverloadCandidate &Cand2) { +static Comparison +isBetterMultiversionCandidate(const OverloadCandidate &Cand1, + const OverloadCandidate &Cand2) { if (!Cand1.Function || !Cand1.Function->isMultiVersion() || !Cand2.Function || !Cand2.Function->isMultiVersion()) - return false; + return Comparison::Equal; - // If Cand1 is invalid, it cannot be a better match, if Cand2 is invalid, this - // is obviously better. - if (Cand1.Function->isInvalidDecl()) return false; - if (Cand2.Function->isInvalidDecl()) return true; + // If both are invalid, they are equal. If one of them is invalid, the other + // is better. + if (Cand1.Function->isInvalidDecl()) { + if (Cand2.Function->isInvalidDecl()) + return Comparison::Equal; + return Comparison::Worse; + } + if (Cand2.Function->isInvalidDecl()) + return Comparison::Better; // If this is a cpu_dispatch/cpu_specific multiversion situation, prefer // cpu_dispatch, else arbitrarily based on the identifiers. @@ -9393,16 +9399,18 @@ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1, const auto *Cand2CPUSpec = Cand2.Function->getAttr(); if (!Cand1CPUDisp && !Cand2CPUDisp && !Cand1CPUSpec && !Cand2CPUSpec) - return false; + return Comparison::Equal; if (Cand1CPUDisp && !Cand2CPUDisp) - return true; + return Comparison::Better; if (Cand2CPUDisp && !Cand1CPUDisp) - return false; + return Comparison::Worse; if (Cand1CPUSpec && Cand2CPUSpec) { if (Cand1CPUSpec->cpus_size() != Cand2CPUSpec->cpus_size()) - return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size(); + return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size() + ? Comparison::Better + : Comparison::Worse; std::pair FirstDiff = std::mismatch( @@ -9415,7 +9423,9 @@ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1, assert(FirstDiff.first != Cand1CPUSpec->cpus_end() && "Two different cpu-specific versions should not have the same " "identifier list, otherwise they'd be the same decl!"); - return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName(); + return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName() + ? Comparison::Better + : Comparison::Worse; } llvm_unreachable("No way to get here unless both had cpu_dispatch"); } @@ -9475,6 +9485,50 @@ bool clang::isBetterOverloadCandidate( else if (!Cand1.Viable) return false; + // [CUDA] A function with 'never' preference is marked not viable, therefore + // is never shown up here. The worst preference shown up here is 'wrong side', + // e.g. a host function called by a device host function in device + // compilation. This is valid AST as long as the host device function is not + // emitted, e.g. it is an inline function which is called only by a host + // function. A deferred diagnostic will be triggered if it is emitted. + // However a wrong-sided function is still a viable candidate here. + // + // If Cand1 can be emitted and Cand2 cannot be emitted in the current + // context, Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2 + // can be emitted, Cand1 is not better than Cand2. This rule should have + // precedence over other rules. + // + // If both Cand1 and Cand2 can be emitted, or neither can be emitted, then + // other rules should be used to determine which is better. This is because + // host/device based overloading resolution is mostly for determining + // viability of a function. If two functions are both viable, other factors + // should take precedence in preference, e.g. the standard-defined preferences + // like argument conversion ranks or enable_if partial-ordering. The + // preference for pass-object-size parameters is probably most similar to a + // type-based-overloading decision and so should take priority. + // + // If other rules cannot determine which is better, CUDA preference will be + // used again to determine which is better. + // + // TODO: Currently IdentifyCUDAPreference does not return correct values + // for functions called in global variable initializers due to missing + // correct context about device/host. Therefore we can only enforce this + // rule when there is a caller. We should enforce this rule for functions + // in global variable initializers once proper context is added. + if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { + if (FunctionDecl *Caller = dyn_cast(S.CurContext)) { + auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function); + auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function); + assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never); + auto Cand1Emittable = P1 > Sema::CFP_WrongSide; + auto Cand2Emittable = P2 > Sema::CFP_WrongSide; + if (Cand1Emittable && !Cand2Emittable) + return true; + if (!Cand1Emittable && Cand2Emittable) + return false; + } + } + // C++ [over.match.best]p1: // // -- if F is a static member function, ICS1(F) is defined such @@ -9709,12 +9763,6 @@ bool clang::isBetterOverloadCandidate( return Cmp == Comparison::Better; } - if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { - FunctionDecl *Caller = dyn_cast(S.CurContext); - return S.IdentifyCUDAPreference(Caller, Cand1.Function) > - S.IdentifyCUDAPreference(Caller, Cand2.Function); - } - bool HasPS1 = Cand1.Function != nullptr && functionHasPassObjectSizeParams(Cand1.Function); bool HasPS2 = Cand2.Function != nullptr && @@ -9722,7 +9770,22 @@ bool clang::isBetterOverloadCandidate( if (HasPS1 != HasPS2 && HasPS1) return true; - return isBetterMultiversionCandidate(Cand1, Cand2); + auto MV = isBetterMultiversionCandidate(Cand1, Cand2); + if (MV == Comparison::Better) + return true; + if (MV == Comparison::Worse) + return false; + + // If other rules cannot determine which is better, CUDA preference is used + // to determine which is better. + if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { + if (FunctionDecl *Caller = dyn_cast(S.CurContext)) { + return S.IdentifyCUDAPreference(Caller, Cand1.Function) > + S.IdentifyCUDAPreference(Caller, Cand2.Function); + } + } + + return false; } /// Determine whether two declarations are "equivalent" for the purposes of @@ -9808,33 +9871,6 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc, std::transform(begin(), end(), std::back_inserter(Candidates), [](OverloadCandidate &Cand) { return &Cand; }); - // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA but - // are accepted by both clang and NVCC. However, during a particular - // compilation mode only one call variant is viable. We need to - // exclude non-viable overload candidates from consideration based - // only on their host/device attributes. Specifically, if one - // candidate call is WrongSide and the other is SameSide, we ignore - // the WrongSide candidate. - if (S.getLangOpts().CUDA) { - const FunctionDecl *Caller = dyn_cast(S.CurContext); - bool ContainsSameSideCandidate = - llvm::any_of(Candidates, [&](OverloadCandidate *Cand) { - // Check viable function only. - return Cand->Viable && Cand->Function && - S.IdentifyCUDAPreference(Caller, Cand->Function) == - Sema::CFP_SameSide; - }); - if (ContainsSameSideCandidate) { - auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) { - // Check viable function only to avoid unnecessary data copying/moving. - return Cand->Viable && Cand->Function && - S.IdentifyCUDAPreference(Caller, Cand->Function) == - Sema::CFP_WrongSide; - }; - llvm::erase_if(Candidates, IsWrongSideCandidate); - } - } - // Find the best viable function. Best = end(); for (auto *Cand : Candidates) { diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu index b9efd1c09e6994..b0e2852a12a755 100644 --- a/clang/test/SemaCUDA/function-overload.cu +++ b/clang/test/SemaCUDA/function-overload.cu @@ -331,9 +331,6 @@ __device__ void test_device_calls_template_fn() { // If we have a mix of HD and H-only or D-only candidates in the overload set, // normal C++ overload resolution rules apply first. template TemplateReturnTy template_vs_hd_function(T arg) -#ifdef __CUDA_ARCH__ -//expected-note@-2 {{declared here}} -#endif { return TemplateReturnTy(); } @@ -342,11 +339,13 @@ __host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) { } __host__ __device__ void test_host_device_calls_hd_template() { - HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); - TemplateReturnTy ret2 = template_vs_hd_function(1); #ifdef __CUDA_ARCH__ - // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function' in __host__ __device__ function}} + typedef HostDeviceReturnTy ExpectedReturnTy; +#else + typedef TemplateReturnTy ExpectedReturnTy; #endif + HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); + ExpectedReturnTy ret2 = template_vs_hd_function(1); } __host__ void test_host_calls_hd_template() { @@ -367,14 +366,14 @@ __device__ void test_device_calls_hd_template() { __device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); } __device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); } #ifndef __CUDA_ARCH__ - // expected-note@-3 {{'device_only_function' declared here}} - // expected-note@-3 {{'device_only_function' declared here}} + // expected-note@-3 2{{'device_only_function' declared here}} + // expected-note@-3 2{{'device_only_function' declared here}} #endif __host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); } __host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); } #ifdef __CUDA_ARCH__ - // expected-note@-3 {{'host_only_function' declared here}} - // expected-note@-3 {{'host_only_function' declared here}} + // expected-note@-3 2{{'host_only_function' declared here}} + // expected-note@-3 2{{'host_only_function' declared here}} #endif __host__ __device__ void test_host_device_single_side_overloading() { @@ -392,6 +391,37 @@ __host__ __device__ void test_host_device_single_side_overloading() { #endif } +// wrong-sided overloading should not cause diagnostic unless it is emitted. +// This inline function is not emitted. +inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_no_diag() { + DeviceReturnTy ret1 = device_only_function(1); + DeviceReturnTy2 ret2 = device_only_function(1.0f); + HostReturnTy ret3 = host_only_function(1); + HostReturnTy2 ret4 = host_only_function(1.0f); +} + +// wrong-sided overloading should cause diagnostic if it is emitted. +// This inline function is emitted since it is called by an emitted function. +inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_diag() { + DeviceReturnTy ret1 = device_only_function(1); + DeviceReturnTy2 ret2 = device_only_function(1.0f); +#ifndef __CUDA_ARCH__ + // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}} + // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}} +#endif + HostReturnTy ret3 = host_only_function(1); + HostReturnTy2 ret4 = host_only_function(1.0f); +#ifdef __CUDA_ARCH__ + // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}} + // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}} +#endif +} + +__host__ __device__ void test_host_device_wrong_side_overloading_inline_diag_caller() { + test_host_device_wrong_side_overloading_inline_diag(); + // expected-note@-1 {{called by 'test_host_device_wrong_side_overloading_inline_diag_caller'}} +} + // Verify that we allow overloading function templates. template __host__ T template_overload(const T &a) { return a; }; template __device__ T template_overload(const T &a) { return a; }; diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp index 29dad43b62eba4..082aaf41155afb 100644 --- a/compiler-rt/lib/xray/xray_interface.cpp +++ b/compiler-rt/lib/xray/xray_interface.cpp @@ -295,7 +295,7 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT { for (std::size_t I = 0; I < InstrMap.Entries; ++I) { auto &Sled = InstrMap.Sleds[I]; - auto F = Sled.Function; + auto F = Sled.function(); if (CurFun == 0) CurFun = F; if (F != CurFun) { @@ -466,7 +466,7 @@ uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT { SpinMutexLock Guard(&XRayInstrMapMutex); if (FuncId <= 0 || static_cast(FuncId) > XRayInstrMap.Functions) return 0; - return XRayInstrMap.SledsIndex[FuncId - 1].Begin->Function + return XRayInstrMap.SledsIndex[FuncId - 1].Begin->function() // On PPC, function entries are always aligned to 16 bytes. The beginning of a // sled might be a local entry, which is always +8 based on the global entry. // Always return the global entry. diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h index cdd1b9cbe2d669..390f389b1dca66 100644 --- a/compiler-rt/lib/xray/xray_interface_internal.h +++ b/compiler-rt/lib/xray/xray_interface_internal.h @@ -29,6 +29,12 @@ struct XRaySledEntry { unsigned char AlwaysInstrument; unsigned char Version; unsigned char Padding[13]; // Need 32 bytes + uint64_t function() const { + if (Version < 2) + return Function; + // The target address is relative to the location of the Function variable. + return reinterpret_cast(&Function) + Function; + } uint64_t address() const { if (Version < 2) return Address; @@ -42,6 +48,12 @@ struct XRaySledEntry { unsigned char AlwaysInstrument; unsigned char Version; unsigned char Padding[5]; // Need 16 bytes + uint32_t function() const { + if (Version < 2) + return Function; + // The target address is relative to the location of the Function variable. + return reinterpret_cast(&Function) + Function; + } uint32_t address() const { if (Version < 2) return Address; diff --git a/flang/runtime/character.cpp b/flang/runtime/character.cpp index b6a804dfa03f73..e65ac38dee874c 100644 --- a/flang/runtime/character.cpp +++ b/flang/runtime/character.cpp @@ -7,11 +7,60 @@ //===----------------------------------------------------------------------===// #include "character.h" +#include "descriptor.h" #include "terminator.h" #include #include namespace Fortran::runtime { + +template +inline int CompareToBlankPadding(const C *x, std::size_t chars) { + for (; chars-- > 0; ++x) { + if (*x < ' ') { + return -1; + } + if (*x > ' ') { + return 1; + } + } + return 0; +} + +template +static int Compare( + const C *x, const C *y, std::size_t xBytes, std::size_t yBytes) { + auto minBytes{std::min(xBytes, yBytes)}; + if constexpr (shift == 0) { + // don't use for kind=2 or =4, that would fail on little-endian machines + int cmp{std::memcmp(x, y, minBytes)}; + if (cmp < 0) { + return -1; + } + if (cmp > 0) { + return 1; + } + if (xBytes == yBytes) { + return 0; + } + x += minBytes; + y += minBytes; + } else { + for (std::size_t n{minBytes >> shift}; n-- > 0; ++x, ++y) { + if (*x < *y) { + return -1; + } + if (*x > *y) { + return 1; + } + } + } + if (int cmp{CompareToBlankPadding(x, (xBytes - minBytes) >> shift)}) { + return cmp; + } + return -CompareToBlankPadding(y, (yBytes - minBytes) >> shift); +} + extern "C" { void RTNAME(CharacterConcatenate)(Descriptor & /*temp*/, @@ -30,18 +79,43 @@ void RTNAME(CharacterAssign)(Descriptor & /*lhs*/, const Descriptor & /*rhs*/, // TODO } -std::size_t RTNAME(CharacterAppend)(char *lhs, std::size_t lhsLength, - std::size_t offset, const char *rhs, std::size_t rhsLength) { - if (auto n{std::min(lhsLength - offset, rhsLength)}) { +int RTNAME(CharacterCompareScalar)(const Descriptor &, const Descriptor &) { + // TODO real soon once there's type codes for character(kind=2 & 4) + return 0; +} + +int RTNAME(CharacterCompareScalar1)( + const char *x, const char *y, std::size_t xBytes, std::size_t yBytes) { + return Compare(x, y, xBytes, yBytes); +} + +int RTNAME(CharacterCompareScalar2)(const char16_t *x, const char16_t *y, + std::size_t xBytes, std::size_t yBytes) { + return Compare(x, y, xBytes, yBytes); +} + +int RTNAME(CharacterCompareScalar4)(const char32_t *x, const char32_t *y, + std::size_t xBytes, std::size_t yBytes) { + return Compare(x, y, xBytes, yBytes); +} + +void RTNAME(CharacterCompare)( + Descriptor &, const Descriptor &, const Descriptor &) { + // TODO real soon once there's type codes for character(kind=2 & 4) +} + +std::size_t RTNAME(CharacterAppend1)(char *lhs, std::size_t lhsBytes, + std::size_t offset, const char *rhs, std::size_t rhsBytes) { + if (auto n{std::min(lhsBytes - offset, rhsBytes)}) { std::memcpy(lhs + offset, rhs, n); offset += n; } return offset; } -void RTNAME(CharacterPad)(char *lhs, std::size_t length, std::size_t offset) { - if (length > offset) { - std::memset(lhs + offset, ' ', length - offset); +void RTNAME(CharacterPad1)(char *lhs, std::size_t bytes, std::size_t offset) { + if (bytes > offset) { + std::memset(lhs + offset, ' ', bytes - offset); } } } diff --git a/flang/runtime/character.h b/flang/runtime/character.h index ff182dec54457a..6705d98bc8f041 100644 --- a/flang/runtime/character.h +++ b/flang/runtime/character.h @@ -11,11 +11,13 @@ #ifndef FORTRAN_RUNTIME_CHARACTER_H_ #define FORTRAN_RUNTIME_CHARACTER_H_ -#include "descriptor.h" #include "entry-names.h" #include namespace Fortran::runtime { + +class Descriptor; + extern "C" { // Appends the corresponding (or expanded) characters of 'operand' @@ -26,8 +28,8 @@ extern "C" { void RTNAME(CharacterConcatenate)(Descriptor &temp, const Descriptor &operand, const char *sourceFile = nullptr, int sourceLine = 0); -// Convenience specialization for character scalars. -void RTNAME(CharacterConcatenateScalar)( +// Convenience specialization for ASCII scalars. +void RTNAME(CharacterConcatenateScalar1)( Descriptor &temp, const char *, std::size_t byteLength); // Assigns the value(s) of 'rhs' to 'lhs'. Handles reallocation, @@ -38,16 +40,36 @@ void RTNAME(CharacterConcatenateScalar)( void RTNAME(CharacterAssign)(Descriptor &lhs, const Descriptor &rhs, const char *sourceFile = nullptr, int sourceLine = 0); -// Special-case support for optimized scalar CHARACTER concatenation -// expressions. +// CHARACTER comparisons. The kinds must match. Like std::memcmp(), +// the result is less than zero, zero, or greater than zero if the first +// argument is less than the second, equal to the second, or greater than +// the second, respectively. The shorter argument is treated as if it were +// padded on the right with blanks. +// N.B.: Calls to the restricted specific intrinsic functions LGE, LGT, LLE, +// & LLT are converted into calls to these during lowering; they don't have +// to be able to be passed as actual procedure arguments. +int RTNAME(CharacterCompareScalar)(const Descriptor &, const Descriptor &); +int RTNAME(CharacterCompareScalar1)( + const char *x, const char *y, std::size_t xBytes, std::size_t yBytes); +int RTNAME(CharacterCompareScalar2)(const char16_t *x, const char16_t *y, + std::size_t xBytes, std::size_t yBytes); +int RTNAME(CharacterCompareScalar4)(const char32_t *x, const char32_t *y, + std::size_t xBytes, std::size_t yBytes); + +// General CHARACTER comparison; the result is a LOGICAL(KIND=1) array that +// is established and populated. +void RTNAME(CharacterCompare)( + Descriptor &result, const Descriptor &, const Descriptor &); + +// Special-case support for optimized ASCII scalar expressions. // Copies data from 'rhs' to the remaining space (lhsLength - offset) // in 'lhs', if any. Returns the new offset. Assumes independence. -std::size_t RTNAME(CharacterAppend)(char *lhs, std::size_t lhsLength, - std::size_t offset, const char *rhs, std::size_t rhsLength); +std::size_t RTNAME(CharacterAppend1)(char *lhs, std::size_t lhsBytes, + std::size_t offset, const char *rhs, std::size_t rhsBytes); // Appends any necessary spaces to a CHARACTER(KIND=1) scalar. -void RTNAME(CharacterPad)(char *lhs, std::size_t length, std::size_t offset); +void RTNAME(CharacterPad1)(char *lhs, std::size_t bytes, std::size_t offset); } } // namespace Fortran::runtime #endif // FORTRAN_RUNTIME_CHARACTER_H_ diff --git a/flang/unittests/Runtime/CMakeLists.txt b/flang/unittests/Runtime/CMakeLists.txt index a5297ac67821f6..4d6ac6411fe2bd 100644 --- a/flang/unittests/Runtime/CMakeLists.txt +++ b/flang/unittests/Runtime/CMakeLists.txt @@ -5,15 +5,15 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) add_library(RuntimeTesting testing.cpp - ) +) add_executable(format-test format.cpp ) target_link_libraries(format-test - FortranRuntime RuntimeTesting + FortranRuntime LLVMSupport ) @@ -24,8 +24,8 @@ add_executable(hello-world ) target_link_libraries(hello-world - FortranRuntime RuntimeTesting + FortranRuntime LLVMSupport ) @@ -42,12 +42,24 @@ target_link_libraries(external-hello-world add_executable(list-input-test list-input.cpp - ) +) target_link_libraries(list-input-test - FortranRuntime RuntimeTesting + FortranRuntime LLVMSupport ) add_test(NAME ListInput COMMAND list-input-test) + +add_executable(character-test + character.cpp +) + +target_link_libraries(character-test + RuntimeTesting + FortranRuntime + LLVMSupport +) + +add_test(NAME CharacterTest COMMAND character-test) diff --git a/flang/unittests/Runtime/character.cpp b/flang/unittests/Runtime/character.cpp new file mode 100644 index 00000000000000..fb023473f64aaa --- /dev/null +++ b/flang/unittests/Runtime/character.cpp @@ -0,0 +1,59 @@ +// Basic sanity tests of CHARACTER API; exhaustive testing will be done +// in Fortran. + +#include "../../runtime/character.h" +#include "testing.h" +#include + +using namespace Fortran::runtime; + +static void AppendAndPad(std::size_t limit) { + char x[8]; + std::size_t xLen{0}; + std::memset(x, 0, sizeof x); + xLen = RTNAME(CharacterAppend1)(x, limit, xLen, "abc", 3); + xLen = RTNAME(CharacterAppend1)(x, limit, xLen, "DE", 2); + RTNAME(CharacterPad1)(x, limit, xLen); + if (xLen > limit) { + Fail() << "xLen " << xLen << ">" << limit << '\n'; + } + if (x[limit]) { + Fail() << "x[" << limit << "]='" << x[limit] << "'\n"; + x[limit] = '\0'; + } + if (std::memcmp(x, "abcDE ", limit)) { + Fail() << "x = '" << x << "'\n"; + } +} + +static void TestCharCompare(const char *x, const char *y, std::size_t xBytes, + std::size_t yBytes, int expect) { + int cmp{RTNAME(CharacterCompareScalar1)(x, y, xBytes, yBytes)}; + if (cmp != expect) { + char buf[2][8]; + std::memset(buf, 0, sizeof buf); + std::memcpy(buf[0], x, xBytes); + std::memcpy(buf[1], y, yBytes); + Fail() << "compare '" << buf[0] << "'(" << xBytes << ") to '" << buf[1] + << "'(" << yBytes << "), got " << cmp << ", should be " << expect + << '\n'; + } +} + +static void Compare(const char *x, const char *y, std::size_t xBytes, + std::size_t yBytes, int expect) { + TestCharCompare(x, y, xBytes, yBytes, expect); + TestCharCompare(y, x, yBytes, xBytes, -expect); +} + +int main() { + StartTests(); + for (std::size_t j{0}; j < 8; ++j) { + AppendAndPad(j); + } + Compare("abc", "abc", 3, 3, 0); + Compare("abc", "def", 3, 3, -1); + Compare("ab ", "abc", 3, 2, 0); + Compare("abc", "abc", 2, 3, -1); + return EndTests(); +} diff --git a/flang/unittests/Runtime/format.cpp b/flang/unittests/Runtime/format.cpp index c855523b427e15..87989eacebcbec 100644 --- a/flang/unittests/Runtime/format.cpp +++ b/flang/unittests/Runtime/format.cpp @@ -3,7 +3,6 @@ #include "testing.h" #include "../runtime/format-implementation.h" #include "../runtime/io-error.h" -#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/flang/unittests/Runtime/hello.cpp b/flang/unittests/Runtime/hello.cpp index 64ed2cbaba721a..22e7380128f358 100644 --- a/flang/unittests/Runtime/hello.cpp +++ b/flang/unittests/Runtime/hello.cpp @@ -3,7 +3,6 @@ #include "testing.h" #include "../../runtime/descriptor.h" #include "../../runtime/io-api.h" -#include "llvm/Support/raw_ostream.h" #include using namespace Fortran::runtime; diff --git a/flang/unittests/Runtime/list-input.cpp b/flang/unittests/Runtime/list-input.cpp index 9f6377656f9133..c7a660dc87aae1 100644 --- a/flang/unittests/Runtime/list-input.cpp +++ b/flang/unittests/Runtime/list-input.cpp @@ -4,7 +4,6 @@ #include "../../runtime/descriptor.h" #include "../../runtime/io-api.h" #include "../../runtime/io-error.h" -#include "llvm/Support/raw_ostream.h" #include #include diff --git a/flang/unittests/Runtime/testing.cpp b/flang/unittests/Runtime/testing.cpp index 8a31f23e9ef56b..146b37db9a5774 100644 --- a/flang/unittests/Runtime/testing.cpp +++ b/flang/unittests/Runtime/testing.cpp @@ -1,6 +1,5 @@ #include "testing.h" #include "../../runtime/terminator.h" -#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/flang/unittests/Runtime/testing.h b/flang/unittests/Runtime/testing.h index 943b6fd8d915e8..1b401aaf854336 100644 --- a/flang/unittests/Runtime/testing.h +++ b/flang/unittests/Runtime/testing.h @@ -1,8 +1,8 @@ #ifndef FORTRAN_TEST_RUNTIME_TESTING_H_ #define FORTRAN_TEST_RUNTIME_TESTING_H_ +#include "llvm/Support/raw_ostream.h" #include -#include namespace llvm { class raw_ostream; diff --git a/libcxx/utils/libcxx/test/newformat.py b/libcxx/utils/libcxx/test/newformat.py index d9d2a5073b408b..2ee6e502da6d68 100644 --- a/libcxx/utils/libcxx/test/newformat.py +++ b/libcxx/utils/libcxx/test/newformat.py @@ -13,6 +13,97 @@ import re import subprocess +def _supportsVerify(test): + """ + Determine whether clang-verify is supported for that test. + + This is done by checking whether the %{cxx} substitution supports certain + compiler flags. + """ + command = "%{{cxx}} -xc++ {} -Werror -fsyntax-only -Xclang -verify-ignore-unexpected".format(os.devnull) + command = lit.TestRunner.applySubstitutions([command], test.config.substitutions, + recursion_limit=test.config.recursiveExpansionLimit)[0] + devNull = open(os.devnull, 'w') + result = subprocess.call(command, shell=True, stdout=devNull, stderr=devNull) + return result == 0 + +def parseScript(test, preamble, fileDependencies): + """ + Extract the script from a test, with substitutions applied. + + Returns a list of commands ready to be executed. + + - test + The lit.Test to parse. + + - preamble + A list of commands to perform before any command in the test. + These commands can contain unexpanded substitutions, but they + must not be of the form 'RUN:' -- they must be proper commands + once substituted. + + - fileDependencies + A list of additional file dependencies for the test. + """ + + # Get the default substitutions + tmpDir, tmpBase = lit.TestRunner.getTempPaths(test) + useExternalSh = True + substitutions = lit.TestRunner.getDefaultSubstitutions(test, tmpDir, tmpBase, + normalize_slashes=useExternalSh) + + # Add the %{build} and %{run} convenience substitutions + substitutions.append(('%{build}', '%{cxx} %s %{flags} %{compile_flags} %{link_flags} -o %t.exe')) + substitutions.append(('%{run}', '%{exec} %t.exe')) + + # Add the %{verify} substitution and the verify-support feature if Clang-verify is supported + if _supportsVerify(test): + test.config.available_features.add('verify-support') + substitutions.append(('%{verify}', '-Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0')) + + # Parse the test file, including custom directives + additionalCompileFlags = [] + fileDependencies = list(fileDependencies) + parsers = [ + lit.TestRunner.IntegratedTestKeywordParser('FILE_DEPENDENCIES:', + lit.TestRunner.ParserKind.LIST, + initial_value=fileDependencies), + lit.TestRunner.IntegratedTestKeywordParser('ADDITIONAL_COMPILE_FLAGS:', + lit.TestRunner.ParserKind.LIST, + initial_value=additionalCompileFlags) + ] + + script = list(preamble) + parsed = lit.TestRunner.parseIntegratedTestScript(test, additional_parsers=parsers, + require_script=not script) + if isinstance(parsed, lit.Test.Result): + return parsed + script += parsed + + # Add compile flags specified with ADDITIONAL_COMPILE_FLAGS. + substitutions = [(s, x + ' ' + ' '.join(additionalCompileFlags)) if s == '%{compile_flags}' + else (s, x) for (s, x) in substitutions] + + # Perform substitutions inside FILE_DEPENDENCIES lines (or injected dependencies). + # This allows using variables like %t in file dependencies. Also note that we really + # need to resolve %{file_dependencies} now, because otherwise we won't be able to + # make all paths absolute below. + fileDependencies = lit.TestRunner.applySubstitutions(fileDependencies, substitutions, + recursion_limit=test.config.recursiveExpansionLimit) + + # Add the %{file_dependencies} substitution before we perform substitutions + # inside the script. + testDir = os.path.dirname(test.getSourcePath()) + fileDependencies = [f if os.path.isabs(f) else os.path.join(testDir, f) for f in fileDependencies] + substitutions.append(('%{file_dependencies}', ' '.join(map(pipes.quote, fileDependencies)))) + + # Perform substitutions in the script itself. + script = lit.TestRunner.applySubstitutions(script, substitutions, + recursion_limit=test.config.recursiveExpansionLimit) + + return script + + class CxxStandardLibraryTest(lit.formats.TestFormat): """ Lit test format for the C++ Standard Library conformance test suite. @@ -148,15 +239,6 @@ def _checkBaseSubstitutions(self, substitutions): for s in ['%{cxx}', '%{compile_flags}', '%{link_flags}', '%{flags}', '%{exec}']: assert s in substitutions, "Required substitution {} was not provided".format(s) - # Determine whether clang-verify is supported. - def _supportsVerify(self, test): - command = "%{{cxx}} -xc++ {} -Werror -fsyntax-only -Xclang -verify-ignore-unexpected".format(os.devnull) - command = lit.TestRunner.applySubstitutions([command], test.config.substitutions, - recursion_limit=test.config.recursiveExpansionLimit)[0] - devNull = open(os.devnull, 'w') - result = subprocess.call(command, shell=True, stdout=devNull, stderr=devNull) - return result == 0 - def _disableWithModules(self, test): with open(test.getSourcePath(), 'rb') as f: contents = f.read() @@ -225,7 +307,7 @@ def execute(self, test, litConfig): # otherwise it's like a .compile.fail.cpp test. This is only provided # for backwards compatibility with the test suite. elif filename.endswith('.fail.cpp'): - if self._supportsVerify(test): + if _supportsVerify(test): steps = [ "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} -fsyntax-only %{verify}" ] @@ -242,87 +324,11 @@ def addCompileFlags(self, config, *flags): string = ' '.join(flags) config.substitutions = [(s, x + ' ' + string) if s == '%{compile_flags}' else (s, x) for (s, x) in config.substitutions] - def _parseScript(self, test, preamble, fileDependencies): - """ - Extract the script from a test, with substitutions applied. - - Returns a list of commands ready to be executed. - - - test - The lit.Test to parse. - - - preamble - A list of commands to perform before any command in the test. - These commands can contain unexpanded substitutions, but they - must not be of the form 'RUN:' -- they must be proper commands - once substituted. - - - fileDependencies - A list of additional file dependencies for the test. - """ - - # Get the default substitutions - tmpDir, tmpBase = lit.TestRunner.getTempPaths(test) - useExternalSh = True - substitutions = lit.TestRunner.getDefaultSubstitutions(test, tmpDir, tmpBase, - normalize_slashes=useExternalSh) - - # Add the %{build} and %{run} convenience substitutions - substitutions.append(('%{build}', '%{cxx} %s %{flags} %{compile_flags} %{link_flags} -o %t.exe')) - substitutions.append(('%{run}', '%{exec} %t.exe')) - - # Add the %{verify} substitution and the verify-support feature if Clang-verify is supported - if self._supportsVerify(test): - test.config.available_features.add('verify-support') - substitutions.append(('%{verify}', '-Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0')) - - # Parse the test file, including custom directives - additionalCompileFlags = [] - fileDependencies = list(fileDependencies) - parsers = [ - lit.TestRunner.IntegratedTestKeywordParser('FILE_DEPENDENCIES:', - lit.TestRunner.ParserKind.LIST, - initial_value=fileDependencies), - lit.TestRunner.IntegratedTestKeywordParser('ADDITIONAL_COMPILE_FLAGS:', - lit.TestRunner.ParserKind.LIST, - initial_value=additionalCompileFlags) - ] - - script = list(preamble) - parsed = lit.TestRunner.parseIntegratedTestScript(test, additional_parsers=parsers, - require_script=not script) - if isinstance(parsed, lit.Test.Result): - return parsed - script += parsed - - # Add compile flags specified with ADDITIONAL_COMPILE_FLAGS. - substitutions = [(s, x + ' ' + ' '.join(additionalCompileFlags)) if s == '%{compile_flags}' - else (s, x) for (s, x) in substitutions] - - # Perform substitutions inside FILE_DEPENDENCIES lines (or injected dependencies). - # This allows using variables like %t in file dependencies. Also note that we really - # need to resolve %{file_dependencies} now, because otherwise we won't be able to - # make all paths absolute below. - fileDependencies = lit.TestRunner.applySubstitutions(fileDependencies, substitutions, - recursion_limit=test.config.recursiveExpansionLimit) - - # Add the %{file_dependencies} substitution before we perform substitutions - # inside the script. - testDir = os.path.dirname(test.getSourcePath()) - fileDependencies = [f if os.path.isabs(f) else os.path.join(testDir, f) for f in fileDependencies] - substitutions.append(('%{file_dependencies}', ' '.join(map(pipes.quote, fileDependencies)))) - - # Perform substitutions in the script itself. - script = lit.TestRunner.applySubstitutions(script, substitutions, - recursion_limit=test.config.recursiveExpansionLimit) - - return script - def _executeShTest(self, test, litConfig, steps, fileDependencies=None): if test.config.unsupported: return lit.Test.Result(lit.Test.UNSUPPORTED, 'Test is unsupported') - script = self._parseScript(test, steps, fileDependencies or []) + script = parseScript(test, steps, fileDependencies or []) if isinstance(script, lit.Test.Result): return script diff --git a/lldb/include/lldb/Utility/XcodeSDK.h b/lldb/include/lldb/Utility/XcodeSDK.h index 552c51c368443b..24ab5b1fdf7aef 100644 --- a/lldb/include/lldb/Utility/XcodeSDK.h +++ b/lldb/include/lldb/Utility/XcodeSDK.h @@ -22,6 +22,9 @@ class XcodeSDK { public: XcodeSDK() = default; + /// Initialize an XcodeSDK object with an SDK name. The SDK name is the last + /// directory component of a path one would pass to clang's -isysroot + /// parameter. For example, "MacOSX.10.14.sdk". XcodeSDK(std::string &&name) : m_name(std::move(name)) {} static XcodeSDK GetAnyMacOS() { return XcodeSDK("MacOSX.sdk"); } @@ -38,7 +41,6 @@ class XcodeSDK { numSDKTypes, unknown = -1 }; - static llvm::StringRef GetNameForType(Type type); /// The merge function follows a strict order to maintain monotonicity: /// 1. SDK with the higher SDKType wins. @@ -49,15 +51,27 @@ class XcodeSDK { XcodeSDK(const XcodeSDK&) = default; bool operator==(XcodeSDK other); - /// Return parsed SDK number, and SDK version number. - std::tuple Parse() const; + /// A parsed SDK directory name. + struct Info { + Type type = unknown; + llvm::VersionTuple version; + bool internal = false; + + Info() = default; + bool operator<(const Info &other) const; + }; + + /// Return parsed SDK type and version number. + Info Parse() const; + bool IsAppleInternalSDK() const; llvm::VersionTuple GetVersion() const; Type GetType() const; llvm::StringRef GetString() const; static bool SDKSupportsModules(Type type, llvm::VersionTuple version); static bool SDKSupportsModules(Type desired_type, const FileSpec &sdk_path); - static llvm::StringRef GetSDKNameForType(Type type); + /// Return the canonical SDK name, such as "macosx" for the macOS SDK. + static std::string GetCanonicalName(Info info); }; } // namespace lldb_private diff --git a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm index c09339e8c67315..e495c752cb193c 100644 --- a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm +++ b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm @@ -298,37 +298,66 @@ static void ParseOSVersion(llvm::VersionTuple &version, NSString *Key) { } std::string HostInfoMacOSX::GetXcodeSDK(XcodeSDK sdk) { - std::string xcrun_cmd = "xcrun --show-sdk-path --sdk " + - XcodeSDK::GetSDKNameForType(sdk.GetType()).str(); - llvm::VersionTuple version = sdk.GetVersion(); - if (!version.empty()) - xcrun_cmd += version.getAsString(); - - int status = 0; - int signo = 0; - std::string output_str; - lldb_private::Status error = - Host::RunShellCommand(xcrun_cmd.c_str(), FileSpec(), &status, &signo, - &output_str, std::chrono::seconds(15)); - - // Check that xcrun return something useful. - if (status != 0 || output_str.empty()) - return {}; - - // Convert to a StringRef so we can manipulate the string without modifying - // the underlying data. - llvm::StringRef output(output_str); - - // Remove any trailing newline characters. - output = output.rtrim(); + XcodeSDK::Info info = sdk.Parse(); + std::string sdk_name = XcodeSDK::GetCanonicalName(info); + auto find_sdk = [](std::string sdk_name) -> std::string { + std::string xcrun_cmd = "xcrun --show-sdk-path --sdk " + sdk_name; + int status = 0; + int signo = 0; + std::string output_str; + lldb_private::Status error = + Host::RunShellCommand(xcrun_cmd.c_str(), FileSpec(), &status, &signo, + &output_str, std::chrono::seconds(15)); + + // Check that xcrun return something useful. + if (status != 0 || output_str.empty()) + return {}; + + // Convert to a StringRef so we can manipulate the string without modifying + // the underlying data. + llvm::StringRef output(output_str); + + // Remove any trailing newline characters. + output = output.rtrim(); + + // Strip any leading newline characters and everything before them. + const size_t last_newline = output.rfind('\n'); + if (last_newline != llvm::StringRef::npos) + output = output.substr(last_newline + 1); + + return output.str(); + }; + + std::string path = find_sdk(sdk_name); + while (path.empty()) { + // Try an alternate spelling of the name ("macosx10.9internal"). + if (info.type == XcodeSDK::Type::MacOSX && !info.version.empty() && + info.internal) { + llvm::StringRef fixed(sdk_name); + if (fixed.consume_back(".internal")) + sdk_name = fixed.str() + "internal"; + path = find_sdk(sdk_name); + if (!path.empty()) + break; + } + Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_HOST); + LLDB_LOGF(log, "Couldn't find SDK %s on host", sdk_name.c_str()); + + // Try without the version. + if (!info.version.empty()) { + info.version = {}; + sdk_name = XcodeSDK::GetCanonicalName(info); + path = find_sdk(sdk_name); + if (!path.empty()) + break; + } - // Strip any leading newline characters and everything before them. - const size_t last_newline = output.rfind('\n'); - if (last_newline != llvm::StringRef::npos) - output = output.substr(last_newline + 1); + LLDB_LOGF(log, "Couldn't find any matching SDK on host"); + return {}; + } // Whatever is left in output should be a valid path. - if (!FileSystem::Instance().Exists(output)) + if (!FileSystem::Instance().Exists(path)) return {}; - return output.str(); + return path; } diff --git a/lldb/source/Utility/XcodeSDK.cpp b/lldb/source/Utility/XcodeSDK.cpp index 7ad0090f85e2d8..a34eac6b2c95b0 100644 --- a/lldb/source/Utility/XcodeSDK.cpp +++ b/lldb/source/Utility/XcodeSDK.cpp @@ -64,13 +64,24 @@ static llvm::VersionTuple ParseSDKVersion(llvm::StringRef &name) { return version; } +static bool ParseAppleInternalSDK(llvm::StringRef &name) { + return name.consume_front("Internal."); +} + +XcodeSDK::Info XcodeSDK::Parse() const { + XcodeSDK::Info info; + llvm::StringRef input(m_name); + info.type = ParseSDKName(input); + info.version = ParseSDKVersion(input); + info.internal = ParseAppleInternalSDK(input); + return info; +} -std::tuple XcodeSDK::Parse() const { +bool XcodeSDK::IsAppleInternalSDK() const { llvm::StringRef input(m_name); - XcodeSDK::Type sdk = ParseSDKName(input); - llvm::VersionTuple version = ParseSDKVersion(input); - return std::make_tuple( - std::move(sdk), std::move(version)); + ParseSDKName(input); + ParseSDKVersion(input); + return ParseAppleInternalSDK(input); } llvm::VersionTuple XcodeSDK::GetVersion() const { @@ -86,37 +97,64 @@ XcodeSDK::Type XcodeSDK::GetType() const { llvm::StringRef XcodeSDK::GetString() const { return m_name; } +bool XcodeSDK::Info::operator<(const Info &other) const { + return std::tie(type, version, internal) < + std::tie(other.type, other.version, other.internal); +} void XcodeSDK::Merge(XcodeSDK other) { // The "bigger" SDK always wins. - if (Parse() < other.Parse()) + auto l = Parse(); + auto r = other.Parse(); + if (l < r) *this = other; + else { + // The Internal flag always wins. + if (llvm::StringRef(m_name).endswith(".sdk")) + if (!l.internal && r.internal) + m_name = + m_name.substr(0, m_name.size() - 3) + std::string("Internal.sdk"); + } } -llvm::StringRef XcodeSDK::GetSDKNameForType(XcodeSDK::Type type) { - switch (type) { +std::string XcodeSDK::GetCanonicalName(XcodeSDK::Info info) { + std::string name; + switch (info.type) { case MacOSX: - return "macosx"; + name = "macosx"; + break; case iPhoneSimulator: - return "iphonesimulator"; + name = "iphonesimulator"; + break; case iPhoneOS: - return "iphoneos"; + name = "iphoneos"; + break; case AppleTVSimulator: - return "appletvsimulator"; + name = "appletvsimulator"; + break; case AppleTVOS: - return "appletvos"; + name = "appletvos"; + break; case WatchSimulator: - return "watchsimulator"; + name = "watchsimulator"; + break; case watchOS: - return "watchos"; + name = "watchos"; + break; case bridgeOS: - return "bridgeos"; + name = "bridgeos"; + break; case Linux: - return "linux"; + name = "linux"; + break; case numSDKTypes: case unknown: - return ""; + return {}; } - llvm_unreachable("unhandled switch case"); + if (!info.version.empty()) + name += info.version.getAsString(); + if (info.internal) + name += ".internal"; + return name; } bool XcodeSDK::SDKSupportsModules(XcodeSDK::Type sdk_type, @@ -147,12 +185,15 @@ bool XcodeSDK::SDKSupportsModules(XcodeSDK::Type desired_type, const llvm::StringRef sdk_name = last_path_component.GetStringRef(); const std::string sdk_name_lower = sdk_name.lower(); - const llvm::StringRef sdk_string = GetSDKNameForType(desired_type); + Info info; + info.type = desired_type; + const llvm::StringRef sdk_string = GetCanonicalName(info); if (!llvm::StringRef(sdk_name_lower).startswith(sdk_string)) return false; auto version_part = sdk_name.drop_front(sdk_string.size()); version_part.consume_back(".sdk"); + version_part.consume_back(".Internal"); llvm::VersionTuple version; if (version.tryParse(version_part)) diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index 670361787f1feb..56f181597b1825 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -592,57 +592,54 @@ int Driver::MainLoop() { bool quit_requested = false; bool stopped_for_crash = false; if ((commands_data != nullptr) && (commands_size != 0u)) { - bool success = true; FILE *commands_file = PrepareCommandsForSourcing(commands_data, commands_size); - if (commands_file != nullptr) { - m_debugger.SetInputFileHandle(commands_file, true); - - // Set the debugger into Sync mode when running the command file. - // Otherwise command files - // that run the target won't run in a sensible way. - bool old_async = m_debugger.GetAsync(); - m_debugger.SetAsync(false); - int num_errors = 0; - - SBCommandInterpreterRunOptions options; - options.SetStopOnError(true); - if (m_option_data.m_batch) - options.SetStopOnCrash(true); - - m_debugger.RunCommandInterpreter(handle_events, spawn_thread, options, - num_errors, quit_requested, - stopped_for_crash); - - if (m_option_data.m_batch && stopped_for_crash && - !m_option_data.m_after_crash_commands.empty()) { - SBStream crash_commands_stream; - WriteCommandsForSourcing(eCommandPlacementAfterCrash, - crash_commands_stream); - const char *crash_commands_data = crash_commands_stream.GetData(); - const size_t crash_commands_size = crash_commands_stream.GetSize(); - commands_file = PrepareCommandsForSourcing(crash_commands_data, - crash_commands_size); - if (commands_file != nullptr) { - bool local_quit_requested; - bool local_stopped_for_crash; - m_debugger.SetInputFileHandle(commands_file, true); - - m_debugger.RunCommandInterpreter(handle_events, spawn_thread, options, - num_errors, local_quit_requested, - local_stopped_for_crash); - if (local_quit_requested) - quit_requested = true; - } - } - m_debugger.SetAsync(old_async); - } else - success = false; - // Something went wrong with command pipe - if (!success) { + if (commands_file == nullptr) { + // We should have already printed an error in PrepareCommandsForSourcing. exit(1); } + + m_debugger.SetInputFileHandle(commands_file, true); + + // Set the debugger into Sync mode when running the command file. + // Otherwise command files + // that run the target won't run in a sensible way. + bool old_async = m_debugger.GetAsync(); + m_debugger.SetAsync(false); + int num_errors = 0; + + SBCommandInterpreterRunOptions options; + options.SetStopOnError(true); + if (m_option_data.m_batch) + options.SetStopOnCrash(true); + + m_debugger.RunCommandInterpreter(handle_events, spawn_thread, options, + num_errors, quit_requested, + stopped_for_crash); + + if (m_option_data.m_batch && stopped_for_crash && + !m_option_data.m_after_crash_commands.empty()) { + SBStream crash_commands_stream; + WriteCommandsForSourcing(eCommandPlacementAfterCrash, + crash_commands_stream); + const char *crash_commands_data = crash_commands_stream.GetData(); + const size_t crash_commands_size = crash_commands_stream.GetSize(); + commands_file = + PrepareCommandsForSourcing(crash_commands_data, crash_commands_size); + if (commands_file != nullptr) { + bool local_quit_requested; + bool local_stopped_for_crash; + m_debugger.SetInputFileHandle(commands_file, true); + + m_debugger.RunCommandInterpreter(handle_events, spawn_thread, options, + num_errors, local_quit_requested, + local_stopped_for_crash); + if (local_quit_requested) + quit_requested = true; + } + } + m_debugger.SetAsync(old_async); } // Now set the input file handle to STDIN and run the command diff --git a/lldb/unittests/Host/HostInfoTest.cpp b/lldb/unittests/Host/HostInfoTest.cpp index ed4b7b5d39c001..d854426e489853 100644 --- a/lldb/unittests/Host/HostInfoTest.cpp +++ b/lldb/unittests/Host/HostInfoTest.cpp @@ -50,3 +50,13 @@ TEST_F(HostInfoTest, GetHostname) { std::string s("abc"); EXPECT_TRUE(HostInfo::GetHostname(s)); } + +#if defined(__APPLE__) +TEST_F(HostInfoTest, GetXcodeSDK) { + EXPECT_FALSE(HostInfo::GetXcodeSDK(XcodeSDK("MacOSX.sdk")).empty()); + // These are expected to fall back to an available version. + EXPECT_FALSE(HostInfo::GetXcodeSDK(XcodeSDK("MacOSX9999.sdk")).empty()); + // This is expected to fail. + EXPECT_TRUE(HostInfo::GetXcodeSDK(XcodeSDK("CeciNestPasUnOS.sdk")).empty()); +} +#endif diff --git a/lldb/unittests/Utility/XcodeSDKTest.cpp b/lldb/unittests/Utility/XcodeSDKTest.cpp index a316516a167588..95b909e700184d 100644 --- a/lldb/unittests/Utility/XcodeSDKTest.cpp +++ b/lldb/unittests/Utility/XcodeSDKTest.cpp @@ -30,6 +30,11 @@ TEST(XcodeSDKTest, ParseTest) { EXPECT_EQ(XcodeSDK("MacOSX.sdk").GetVersion(), llvm::VersionTuple()); EXPECT_EQ(XcodeSDK("MacOSX10.9.sdk").GetVersion(), llvm::VersionTuple(10, 9)); EXPECT_EQ(XcodeSDK("MacOSX10.15.4.sdk").GetVersion(), llvm::VersionTuple(10, 15)); + EXPECT_EQ(XcodeSDK("MacOSX.sdk").IsAppleInternalSDK(), false); + EXPECT_EQ(XcodeSDK("MacOSX10.15.Internal.sdk").GetType(), XcodeSDK::MacOSX); + EXPECT_EQ(XcodeSDK("MacOSX10.15.Internal.sdk").GetVersion(), + llvm::VersionTuple(10, 15)); + EXPECT_EQ(XcodeSDK("MacOSX10.15.Internal.sdk").IsAppleInternalSDK(), true); EXPECT_EQ(XcodeSDK().GetType(), XcodeSDK::unknown); EXPECT_EQ(XcodeSDK().GetVersion(), llvm::VersionTuple()); } @@ -46,6 +51,12 @@ TEST(XcodeSDKTest, MergeTest) { EXPECT_EQ(sdk.GetVersion(), llvm::VersionTuple(1, 1)); sdk.Merge(XcodeSDK("WatchOS2.0.sdk")); EXPECT_EQ(sdk.GetVersion(), llvm::VersionTuple(2, 0)); + sdk.Merge(XcodeSDK("WatchOS1.1.Internal.sdk")); + EXPECT_EQ(sdk.GetVersion(), llvm::VersionTuple(2, 0)); + EXPECT_EQ(sdk.IsAppleInternalSDK(), true); + XcodeSDK empty; + empty.Merge(XcodeSDK("MacOSX10.14.Internal.sdk")); + EXPECT_EQ(empty.GetString(), llvm::StringRef("MacOSX10.14.Internal.sdk")); } TEST(XcodeSDKTest, SDKSupportsModules) { @@ -55,6 +66,10 @@ TEST(XcodeSDKTest, SDKSupportsModules) { FileSpec( base + "iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator12.0.sdk"))); + EXPECT_TRUE(XcodeSDK::SDKSupportsModules( + XcodeSDK::Type::iPhoneSimulator, + FileSpec(base + "iPhoneSimulator.platform/Developer/SDKs/" + "iPhoneSimulator12.0.Internal.sdk"))); EXPECT_FALSE(XcodeSDK::SDKSupportsModules( XcodeSDK::Type::iPhoneSimulator, FileSpec( @@ -68,19 +83,65 @@ TEST(XcodeSDKTest, SDKSupportsModules) { FileSpec(base + "MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk"))); } -TEST(XcodeSDKTest, GetSDKNameForType) { - EXPECT_EQ("macosx", XcodeSDK::GetSDKNameForType(XcodeSDK::Type::MacOSX)); - EXPECT_EQ("iphonesimulator", - XcodeSDK::GetSDKNameForType(XcodeSDK::Type::iPhoneSimulator)); - EXPECT_EQ("iphoneos", XcodeSDK::GetSDKNameForType(XcodeSDK::Type::iPhoneOS)); - EXPECT_EQ("appletvsimulator", - XcodeSDK::GetSDKNameForType(XcodeSDK::Type::AppleTVSimulator)); - EXPECT_EQ("appletvos", - XcodeSDK::GetSDKNameForType(XcodeSDK::Type::AppleTVOS)); - EXPECT_EQ("watchsimulator", - XcodeSDK::GetSDKNameForType(XcodeSDK::Type::WatchSimulator)); - EXPECT_EQ("watchos", XcodeSDK::GetSDKNameForType(XcodeSDK::Type::watchOS)); - EXPECT_EQ("linux", XcodeSDK::GetSDKNameForType(XcodeSDK::Type::Linux)); - EXPECT_EQ("", XcodeSDK::GetSDKNameForType(XcodeSDK::Type::numSDKTypes)); - EXPECT_EQ("", XcodeSDK::GetSDKNameForType(XcodeSDK::Type::unknown)); +TEST(XcodeSDKTest, GetCanonicalName) { + XcodeSDK::Info info; + info.type = XcodeSDK::Type::MacOSX; + EXPECT_EQ("macosx", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::iPhoneSimulator; + EXPECT_EQ("iphonesimulator", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::iPhoneOS; + EXPECT_EQ("iphoneos", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::AppleTVSimulator; + EXPECT_EQ("appletvsimulator", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::AppleTVOS; + EXPECT_EQ("appletvos", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::WatchSimulator; + EXPECT_EQ("watchsimulator", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::watchOS; + EXPECT_EQ("watchos", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::Linux; + EXPECT_EQ("linux", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::numSDKTypes; + EXPECT_EQ("", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::unknown; + EXPECT_EQ("", XcodeSDK::GetCanonicalName(info)); + + info.internal = true; + info.type = XcodeSDK::Type::MacOSX; + EXPECT_EQ("macosx.internal", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::iPhoneSimulator; + EXPECT_EQ("iphonesimulator.internal", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::iPhoneOS; + EXPECT_EQ("iphoneos.internal", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::AppleTVSimulator; + EXPECT_EQ("appletvsimulator.internal", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::AppleTVOS; + EXPECT_EQ("appletvos.internal", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::WatchSimulator; + EXPECT_EQ("watchsimulator.internal", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::watchOS; + EXPECT_EQ("watchos.internal", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::MacOSX; + info.version = llvm::VersionTuple(10, 9); + EXPECT_EQ("macosx10.9.internal", XcodeSDK::GetCanonicalName(info)); + + info.type = XcodeSDK::Type::iPhoneOS; + info.version = llvm::VersionTuple(7, 0); + EXPECT_EQ("iphoneos7.0.internal", XcodeSDK::GetCanonicalName(info)); } diff --git a/llvm/include/llvm/ADT/SmallBitVector.h b/llvm/include/llvm/ADT/SmallBitVector.h index 14545e2b612d78..f570bac23ad517 100644 --- a/llvm/include/llvm/ADT/SmallBitVector.h +++ b/llvm/include/llvm/ADT/SmallBitVector.h @@ -287,11 +287,11 @@ class SmallBitVector { /// Returns -1 if the next unset bit is not found. int find_next_unset(unsigned Prev) const { if (isSmall()) { - ++Prev; uintptr_t Bits = getSmallBits(); // Mask in previous bits. - uintptr_t Mask = (uintptr_t(1) << Prev) - 1; - Bits |= Mask; + Bits |= (uintptr_t(1) << (Prev + 1)) - 1; + // Mask in unused bits. + Bits |= ~uintptr_t(0) << getSmallSize(); if (Bits == ~uintptr_t(0) || Prev + 1 >= getSmallSize()) return -1; diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index b4aa47981adabc..ff9ec9cfa6b84b 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -280,7 +280,7 @@ class AsmPrinter : public MachineFunctionPass { const class Function *Fn; uint8_t Version; - void emit(int, MCStreamer *, const MCExpr *, const MCSymbol *) const; + void emit(int, MCStreamer *) const; }; // All the sleds to be emitted. diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 24a82c431a5f2a..21e3d93ded2298 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -725,7 +725,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // If the cast is marked as legal (or promote) then assume low cost. if (SrcLT.first == DstLT.first && TLI->isOperationLegalOrPromote(ISD, DstLT.second)) - return 1; + return SrcLT.first; // Handle scalar conversions. if (!Src->isVectorTy() && !Dst->isVectorTy()) { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index e32de8888c06d7..4398377606636d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -141,6 +141,14 @@ class CallLowering { uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) = 0; + /// An overload which takes an ArgInfo if additional information about + /// the arg is needed. + virtual void assignValueToAddress(const ArgInfo &Arg, Register Addr, + uint64_t Size, MachinePointerInfo &MPO, + CCValAssign &VA) { + assignValueToAddress(Arg.Regs[0], Addr, Size, MPO, VA); + } + /// Handle custom values, which may be passed into one or more of \p VAs. /// \return The number of \p VAs that have been assigned after the first /// one, and which should therefore be skipped from further @@ -152,7 +160,10 @@ class CallLowering { llvm_unreachable("Custom values not supported"); } - Register extendRegister(Register ValReg, CCValAssign &VA); + /// Extend a register to the location type given in VA, capped at extending + /// to at most MaxSize bits. If MaxSizeBits is 0 then no maximum is set. + Register extendRegister(Register ValReg, CCValAssign &VA, + unsigned MaxSizeBits = 0); virtual bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const ArgInfo &Info, diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index 754b7e83135e53..48d00fab2fa94d 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -38,7 +38,7 @@ class MCObjectStreamer : public MCStreamer { bool EmitEHFrame; bool EmitDebugFrame; SmallVector PendingLabels; - SmallPtrSet PendingLabelSections; + SmallVector PendingLabelSections; unsigned CurSubsectionIdx; struct PendingMCFixup { const MCSymbol *Sym; diff --git a/llvm/include/llvm/XRay/InstrumentationMap.h b/llvm/include/llvm/XRay/InstrumentationMap.h index 5cbe5c44893b21..aae90345cbb749 100644 --- a/llvm/include/llvm/XRay/InstrumentationMap.h +++ b/llvm/include/llvm/XRay/InstrumentationMap.h @@ -50,6 +50,8 @@ struct SledEntry { /// Whether the sled was annotated to always be instrumented. bool AlwaysInstrument; + + unsigned char Version; }; struct YAMLXRaySledEntry { @@ -59,6 +61,7 @@ struct YAMLXRaySledEntry { SledEntry::FunctionKinds Kind; bool AlwaysInstrument; std::string FunctionName; + unsigned char Version; }; /// The InstrumentationMap represents the computed function id's and indicated @@ -120,6 +123,7 @@ template <> struct MappingTraits { IO.mapRequired("kind", Entry.Kind); IO.mapRequired("always-instrument", Entry.AlwaysInstrument); IO.mapOptional("function-name", Entry.FunctionName); + IO.mapOptional("version", Entry.Version, 0); } static constexpr bool flow = true; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 7b9bcb636861c9..0d31a9db415458 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1767,6 +1767,8 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurExceptionSym = nullptr; bool NeedsLocalForSize = MAI->needsLocalForSize(); if (F.hasFnAttribute("patchable-function-entry") || + F.hasFnAttribute("function-instrument") || + F.hasFnAttribute("xray-instruction-threshold") || needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection) { CurrentFnBegin = createTempSymbol("func_begin"); @@ -3174,14 +3176,7 @@ void AsmPrinterHandler::markFunctionEnd() {} // In the binary's "xray_instr_map" section, an array of these function entries // describes each instrumentation point. When XRay patches your code, the index // into this table will be given to your handler as a patch point identifier. -void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out, - const MCExpr *Location, - const MCSymbol *CurrentFnSym) const { - if (Location) - Out->emitValueImpl(Location, Bytes); - else - Out->emitSymbolValue(Sled, Bytes); - Out->emitSymbolValue(CurrentFnSym, Bytes); +void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out) const { auto Kind8 = static_cast(Kind); Out->emitBinaryData(StringRef(reinterpret_cast(&Kind8), 1)); Out->emitBinaryData( @@ -3234,19 +3229,31 @@ void AsmPrinter::emitXRayTable() { // Now we switch to the instrumentation map section. Because this is done // per-function, we are able to create an index entry that will represent the // range of sleds associated with a function. + auto &Ctx = OutContext; MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true); OutStreamer->SwitchSection(InstMap); OutStreamer->emitLabel(SledsStart); for (const auto &Sled : Sleds) { - const MCExpr *Location = nullptr; if (PCRel) { - MCSymbol *Dot = OutContext.createTempSymbol(); + MCSymbol *Dot = Ctx.createTempSymbol(); OutStreamer->emitLabel(Dot); - Location = MCBinaryExpr::createSub( - MCSymbolRefExpr::create(Sled.Sled, OutContext), - MCSymbolRefExpr::create(Dot, OutContext), OutContext); + OutStreamer->emitValueImpl( + MCBinaryExpr::createSub(MCSymbolRefExpr::create(Sled.Sled, Ctx), + MCSymbolRefExpr::create(Dot, Ctx), Ctx), + WordSizeBytes); + OutStreamer->emitValueImpl( + MCBinaryExpr::createSub( + MCSymbolRefExpr::create(CurrentFnBegin, Ctx), + MCBinaryExpr::createAdd( + MCSymbolRefExpr::create(Dot, Ctx), + MCConstantExpr::create(WordSizeBytes, Ctx), Ctx), + Ctx), + WordSizeBytes); + } else { + OutStreamer->emitSymbolValue(Sled.Sled, WordSizeBytes); + OutStreamer->emitSymbolValue(CurrentFnSym, WordSizeBytes); } - Sled.emit(WordSizeBytes, OutStreamer.get(), Location, CurrentFnSym); + Sled.emit(WordSizeBytes, OutStreamer.get()); } MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true); OutStreamer->emitLabel(SledsEnd); diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 2162b2f041b4b9..70bb272796739e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -373,7 +373,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo, unsigned Offset = VA.getLocMemOffset(); MachinePointerInfo MPO; Register StackAddr = Handler.getStackAddress(Size, Offset, MPO); - Handler.assignValueToAddress(ArgReg, StackAddr, Size, MPO, VA); + Handler.assignValueToAddress(Args[i], StackAddr, Size, MPO, VA); } else { // FIXME: Support byvals and other weirdness return false; @@ -458,10 +458,19 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info, } Register CallLowering::ValueHandler::extendRegister(Register ValReg, - CCValAssign &VA) { + CCValAssign &VA, + unsigned MaxSizeBits) { LLT LocTy{VA.getLocVT()}; - if (LocTy.getSizeInBits() == MRI.getType(ValReg).getSizeInBits()) + LLT ValTy = MRI.getType(ValReg); + if (LocTy.getSizeInBits() == ValTy.getSizeInBits()) return ValReg; + + if (LocTy.isScalar() && MaxSizeBits && MaxSizeBits < LocTy.getSizeInBits()) { + if (MaxSizeBits <= ValTy.getSizeInBits()) + return ValReg; + LocTy = LLT::scalar(MaxSizeBits); + } + switch (VA.getLocInfo()) { default: break; case CCValAssign::Full: diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 8ee85c6229b65e..7afa61f2c4dbd4 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1220,6 +1220,10 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, if (!mayStore() && !Other.mayStore()) return false; + // Both instructions must be memory operations to be able to alias. + if (!mayLoadOrStore() || !Other.mayLoadOrStore()) + return false; + // Let the target decide if memory accesses cannot possibly overlap. if (TII->areMemAccessesTriviallyDisjoint(*this, Other)) return false; diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index fd4422fc368cec..5bb8e76713fbeb 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -59,8 +59,12 @@ void MCObjectStreamer::addPendingLabel(MCSymbol* S) { CurSection->addPendingLabel(S, CurSubsectionIdx); // Add this Section to the list of PendingLabelSections. - PendingLabelSections.insert(CurSection); - } else + auto SecIt = std::find(PendingLabelSections.begin(), + PendingLabelSections.end(), CurSection); + if (SecIt == PendingLabelSections.end()) + PendingLabelSections.push_back(CurSection); + } + else // There is no Section / Subsection for this label yet. PendingLabels.push_back(S); } diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp index b15bbe106f17b2..f6daa5f7f9e2f2 100644 --- a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp @@ -171,17 +171,33 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) override { - if (VA.getLocInfo() == CCValAssign::LocInfo::AExt) { - Size = VA.getLocVT().getSizeInBits() / 8; - ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(Size * 8), ValVReg) - .getReg(0); - } MachineFunction &MF = MIRBuilder.getMF(); auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, Size, inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildStore(ValVReg, Addr, *MMO); } + void assignValueToAddress(const CallLowering::ArgInfo &Arg, Register Addr, + uint64_t Size, MachinePointerInfo &MPO, + CCValAssign &VA) override { + unsigned MaxSize = Size * 8; + // For varargs, we always want to extend them to 8 bytes, in which case + // we disable setting a max. + if (!Arg.IsFixed) + MaxSize = 0; + + Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt + ? extendRegister(Arg.Regs[0], VA, MaxSize) + : Arg.Regs[0]; + + // If we extended we might need to adjust the MMO's Size. + const LLT RegTy = MRI.getType(ValVReg); + if (RegTy.getSizeInBytes() > Size) + Size = RegTy.getSizeInBytes(); + + assignValueToAddress(ValVReg, Addr, Size, MPO, VA); + } + bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 25237bf50ddeb7..f8709bc8bc4b18 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1144,24 +1144,11 @@ static int alignTo(int Num, int PowOf2) { return (Num + PowOf2 - 1) & ~(PowOf2 - 1); } -static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb, - AliasAnalysis *AA) { - // One of the instructions must modify memory. - if (!MIa.mayStore() && !MIb.mayStore()) - return false; - - // Both instructions must be memory operations. - if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore()) - return false; - - return MIa.mayAlias(AA, MIb, /*UseTBAA*/false); -} - static bool mayAlias(MachineInstr &MIa, SmallVectorImpl &MemInsns, AliasAnalysis *AA) { for (MachineInstr *MIb : MemInsns) - if (mayAlias(MIa, *MIb, AA)) + if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) return true; return false; @@ -1219,7 +1206,7 @@ bool AArch64LoadStoreOpt::findMatchingStore( return false; // If we encounter a store aliased with the load, return early. - if (MI.mayStore() && mayAlias(LoadMI, MI, AA)) + if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false)) return false; } while (MBBI != B && Count < Limit); return false; diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 924296a2ff33b8..0a16a61203c8bd 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -265,8 +265,11 @@ class SILoadStoreOptimizer : public MachineFunctionPass { SmallPtrSet &Promoted) const; void addInstToMergeableList(const CombineInfo &CI, std::list > &MergeableInsts) const; - bool collectMergeableInsts(MachineBasicBlock &MBB, - std::list > &MergeableInsts) const; + + std::pair collectMergeableInsts( + MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, + MemInfoMap &Visited, SmallPtrSet &AnchorList, + std::list> &MergeableInsts) const; public: static char ID; @@ -1944,31 +1947,38 @@ void SILoadStoreOptimizer::addInstToMergeableList(const CombineInfo &CI, MergeableInsts.emplace_back(1, CI); } -bool SILoadStoreOptimizer::collectMergeableInsts(MachineBasicBlock &MBB, - std::list > &MergeableInsts) const { +std::pair +SILoadStoreOptimizer::collectMergeableInsts( + MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, + MemInfoMap &Visited, SmallPtrSet &AnchorList, + std::list> &MergeableInsts) const { bool Modified = false; - // Contain the list - MemInfoMap Visited; - // Contains the list of instructions for which constant offsets are being - // promoted to the IMM. - SmallPtrSet AnchorList; // Sort potential mergeable instructions into lists. One list per base address. unsigned Order = 0; - for (MachineInstr &MI : MBB.instrs()) { + MachineBasicBlock::iterator BlockI = Begin; + for (; BlockI != End; ++BlockI) { + MachineInstr &MI = *BlockI; + // We run this before checking if an address is mergeable, because it can produce // better code even if the instructions aren't mergeable. if (promoteConstantOffsetToImm(MI, Visited, AnchorList)) Modified = true; + // Don't combine if volatile. We also won't be able to merge across this, so + // break the search. We can look after this barrier for separate merges. + if (MI.hasOrderedMemoryRef()) { + LLVM_DEBUG(dbgs() << "Breaking search on memory fence: " << MI); + + // Search will resume after this instruction in a separate merge list. + ++BlockI; + break; + } + const InstClassEnum InstClass = getInstClass(MI.getOpcode(), *TII); if (InstClass == UNKNOWN) continue; - // Don't combine if volatile. - if (MI.hasOrderedMemoryRef()) - continue; - CombineInfo CI; CI.setMI(MI, *TII, *STM); CI.Order = Order++; @@ -2008,7 +2018,7 @@ bool SILoadStoreOptimizer::collectMergeableInsts(MachineBasicBlock &MBB, ++I; } - return Modified; + return std::make_pair(BlockI, Modified); } // Scan through looking for adjacent LDS operations with constant offsets from @@ -2161,15 +2171,33 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { bool Modified = false; + // Contains the list of instructions for which constant offsets are being + // promoted to the IMM. This is tracked for an entire block at time. + SmallPtrSet AnchorList; + MemInfoMap Visited; for (MachineBasicBlock &MBB : MF) { - std::list > MergeableInsts; - // First pass: Collect list of all instructions we know how to merge. - Modified |= collectMergeableInsts(MBB, MergeableInsts); - do { - OptimizeAgain = false; - Modified |= optimizeBlock(MergeableInsts); - } while (OptimizeAgain); + MachineBasicBlock::iterator SectionEnd; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; + I = SectionEnd) { + bool CollectModified; + std::list> MergeableInsts; + + // First pass: Collect list of all instructions we know how to merge in a + // subset of the block. + std::tie(SectionEnd, CollectModified) = + collectMergeableInsts(I, E, Visited, AnchorList, MergeableInsts); + + Modified |= CollectModified; + + do { + OptimizeAgain = false; + Modified |= optimizeBlock(MergeableInsts); + } while (OptimizeAgain); + } + + Visited.clear(); + AnchorList.clear(); } return Modified; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 28b175a88ad3b8..1f75dd184cf058 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -588,6 +588,18 @@ def SFENCE_VMA : RVInstR<0b0001001, 0b000, OPC_SYSTEM, (outs), let rd = 0; } +//===----------------------------------------------------------------------===// +// Debug instructions +//===----------------------------------------------------------------------===// + +let isBarrier = 1, isReturn = 1, isTerminator = 1 in { +def DRET : Priv<"dret", 0b0111101>, Sched<[]> { + let rd = 0; + let rs1 = 0; + let rs2 = 0b10010; +} +} // isBarrier = 1, isReturn = 1, isTerminator = 1 + //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 2bc5fe90f84cf1..be5a375a2cfa1e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -202,10 +202,23 @@ class SimplifyCFGOpt { bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder); bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder); bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder); + bool SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder); bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, IRBuilder<> &Builder); + bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI); + bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, + const TargetTransformInfo &TTI); + bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, + BasicBlock *TrueBB, BasicBlock *FalseBB, + uint32_t TrueWeight, uint32_t FalseWeight); + bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, + const DataLayout &DL); + bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select); + bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI); + bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder); + public: SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, SmallPtrSetImpl *LoopHeaders, @@ -1235,8 +1248,8 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I); /// Given a conditional branch that goes to BB1 and BB2, hoist any common code /// in the two blocks up into the branch block. The caller of this function /// guarantees that BI's block dominates BB1 and BB2. -static bool HoistThenElseCodeToIf(BranchInst *BI, - const TargetTransformInfo &TTI) { +bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, + const TargetTransformInfo &TTI) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As @@ -1969,8 +1982,8 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, /// \endcode /// /// \returns true if the conditional block is removed. -static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, - const TargetTransformInfo &TTI) { +bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, + const TargetTransformInfo &TTI) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa(BrCond)) @@ -2462,8 +2475,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, /// If we found a conditional branch that goes to two returning blocks, /// try to merge them together into one return, /// introducing a select if the return values disagree. -static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, - IRBuilder<> &Builder) { +bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, + IRBuilder<> &Builder) { assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); @@ -3522,10 +3535,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // Takes care of updating the successors and removing the old terminator. // Also makes sure not to introduce new successors by assuming that edges to // non-successor TrueBBs and FalseBBs aren't reachable. -static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, - BasicBlock *TrueBB, BasicBlock *FalseBB, - uint32_t TrueWeight, - uint32_t FalseWeight) { +bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, + Value *Cond, BasicBlock *TrueBB, + BasicBlock *FalseBB, + uint32_t TrueWeight, + uint32_t FalseWeight) { // Remove any superfluous successor edges from the CFG. // First, figure out which successors to preserve. // If TrueBB and FalseBB are equal, only try to preserve one copy of that @@ -3585,7 +3599,8 @@ static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, // (switch (select cond, X, Y)) on constant X, Y // with a branch - conditional if X and Y lead to distinct BBs, // unconditional otherwise. -static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { +bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI, + SelectInst *Select) { // Check for constant integer values in the select. ConstantInt *TrueVal = dyn_cast(Select->getTrueValue()); ConstantInt *FalseVal = dyn_cast(Select->getFalseValue()); @@ -3621,7 +3636,8 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { // blockaddress(@fn, BlockB))) // with // (br cond, BlockA, BlockB). -static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { +bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, + SelectInst *SI) { // Check that both operands of the select are block addresses. BlockAddress *TBA = dyn_cast(SI->getTrueValue()); BlockAddress *FBA = dyn_cast(SI->getFalseValue()); @@ -3756,8 +3772,9 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( /// The specified branch is a conditional branch. /// Check to see if it is branching on an or/and chain of icmp instructions, and /// fold it into a switch instruction if so. -static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, - const DataLayout &DL) { +bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, + IRBuilder<> &Builder, + const DataLayout &DL) { Instruction *Cond = dyn_cast(BI->getCondition()); if (!Cond) return false; @@ -4407,7 +4424,8 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch) { /// Turn a switch with two reachable destinations into an integer range /// comparison and branch. -static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { +bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, + IRBuilder<> &Builder) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); bool HasDefault = diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp index 1e9b69a5f9dccc..b095d7134a5fef 100644 --- a/llvm/lib/XRay/InstrumentationMap.cpp +++ b/llvm/lib/XRay/InstrumentationMap.cpp @@ -68,10 +68,13 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, StringRef Contents = ""; const auto &Sections = ObjFile.getBinary()->sections(); + uint64_t Address = 0; auto I = llvm::find_if(Sections, [&](object::SectionRef Section) { Expected NameOrErr = Section.getName(); - if (NameOrErr) + if (NameOrErr) { + Address = Section.getAddress(); return *NameOrErr == "xray_instr_map"; + } consumeError(NameOrErr.takeError()); return false; }); @@ -141,6 +144,7 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, return Address; }; + const int WordSize = 8; int32_t FuncId = 1; uint64_t CurFn = 0; for (; C != Contents.bytes_end(); C += ELF64SledEntrySize) { @@ -165,6 +169,11 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, std::make_error_code(std::errc::executable_format_error)); Entry.Kind = Kinds[Kind]; Entry.AlwaysInstrument = Extractor.getU8(&OffsetPtr) != 0; + Entry.Version = Extractor.getU8(&OffsetPtr); + if (Entry.Version >= 2) { + Entry.Address += C - Contents.bytes_begin() + Address; + Entry.Function += C - Contents.bytes_begin() + WordSize + Address; + } // We do replicate the function id generation scheme implemented in the // XRay runtime. @@ -209,8 +218,8 @@ loadYAML(sys::fs::file_t Fd, size_t FileSize, StringRef Filename, for (const auto &Y : YAMLSleds) { FunctionAddresses[Y.FuncId] = Y.Function; FunctionIds[Y.Function] = Y.FuncId; - Sleds.push_back( - SledEntry{Y.Address, Y.Function, Y.Kind, Y.AlwaysInstrument}); + Sleds.push_back(SledEntry{Y.Address, Y.Function, Y.Kind, Y.AlwaysInstrument, + Y.Version}); } return Error::success(); } diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll index 5d857bc2e80280..edfe281ff53416 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -424,8 +424,8 @@ define i32 @casts() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> @@ -444,8 +444,8 @@ define i32 @casts() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> @@ -504,8 +504,8 @@ define i32 @casts() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> @@ -524,8 +524,8 @@ define i32 @casts() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> @@ -611,16 +611,16 @@ define i32 @casts() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r68 = sitofp i64 undef to double ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> @@ -878,16 +878,16 @@ define i32 @casts() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r68 = sitofp i64 undef to double ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> @@ -1738,14 +1738,14 @@ define i32 @load_extends() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> @@ -1774,14 +1774,14 @@ define i32 @load_extends() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll index a230648eb5833d..b9f01b55501156 100644 --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -219,12 +219,26 @@ define i32 @fptosi_float_i64(i32 %arg) { } define i32 @fptosi_float_i32(i32 %arg) { -; CHECK-LABEL: 'fptosi_float_i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'fptosi_float_i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'fptosi_float_i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fptosi_float_i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I32 = fptosi float undef to i32 %V4I32 = fptosi <4 x float> undef to <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll index f06cd14abe753f..eec73695808199 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll @@ -1,31 +1,101 @@ -; RUN: llc -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64-apple-ios9.0" -; CHECK-LABEL: name: test_varargs -; CHECK: [[ANSWER:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 -; CHECK: [[D_ONE:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 -; CHECK: [[TWELVE:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 -; CHECK: [[THREE:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 -; CHECK: [[ONE:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 -; CHECK: [[FOUR:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 -; CHECK: [[F_ONE:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 -; CHECK: [[TWO:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 - -; CHECK: $w0 = COPY [[ANSWER]] -; CHECK: $d0 = COPY [[D_ONE]] -; CHECK: $x1 = COPY [[TWELVE]] -; CHECK: [[THREE_EXT:%[0-9]+]]:_(s64) = G_ANYEXT [[THREE]] -; CHECK: G_STORE [[THREE_EXT]](s64), {{%[0-9]+}}(p0) :: (store 8 into stack, align 1) -; CHECK: [[ONE_EXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ONE]] -; CHECK: G_STORE [[ONE_EXT]](s64), {{%[0-9]+}}(p0) :: (store 8 into stack + 8, align 1) -; CHECK: [[FOUR_EXT:%[0-9]+]]:_(s64) = G_ANYEXT [[FOUR]] -; CHECK: G_STORE [[FOUR_EXT]](s64), {{%[0-9]+}}(p0) :: (store 8 into stack + 16, align 1) -; CHECK: G_STORE [[F_ONE]](s32), {{%[0-9]+}}(p0) :: (store 4 into stack + 24, align 1) -; CHECK: G_STORE [[TWO]](s64), {{%[0-9]+}}(p0) :: (store 8 into stack + 32, align 1) declare void @varargs(i32, double, i64, ...) define void @test_varargs() { + ; CHECK-LABEL: name: test_varargs + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CHECK: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 + ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 + ; CHECK: ADJCALLSTACKDOWN 40, 0, implicit-def $sp, implicit $sp + ; CHECK: $w0 = COPY [[C]](s32) + ; CHECK: $d0 = COPY [[C1]](s64) + ; CHECK: $x1 = COPY [[C2]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sp + ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s8) + ; CHECK: G_STORE [[ANYEXT]](s64), [[PTR_ADD]](p0) :: (store 8 into stack, align 1) + ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C4]](s16) + ; CHECK: G_STORE [[ANYEXT1]](s64), [[PTR_ADD1]](p0) :: (store 8 into stack + 8, align 1) + ; CHECK: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C5]](s32) + ; CHECK: G_STORE [[ANYEXT2]](s64), [[PTR_ADD2]](p0) :: (store 8 into stack + 16, align 1) + ; CHECK: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) + ; CHECK: G_STORE [[C6]](s32), [[PTR_ADD3]](p0) :: (store 4 into stack + 24, align 1) + ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64) + ; CHECK: G_STORE [[C7]](s64), [[PTR_ADD4]](p0) :: (store 8 into stack + 32, align 1) + ; CHECK: BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1 + ; CHECK: ADJCALLSTACKUP 40, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i8 3, i16 1, i32 4, float 1.0, double 2.0) ret void } + +declare i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext %a5, i64 %a6, i64 %a7, i64 %a8, i8 signext %b1, i16 signext %b2, i8 signext %b3, i8 signext %b4) nounwind readnone noinline + +define i32 @i8i16caller() nounwind readnone { + ; CHECK-LABEL: name: i8i16caller + ; CHECK: bb.1.entry: + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 + ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; CHECK: [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 97 + ; CHECK: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 98 + ; CHECK: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 99 + ; CHECK: [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 100 + ; CHECK: ADJCALLSTACKDOWN 6, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[C]](s64) + ; CHECK: $x1 = COPY [[C1]](s64) + ; CHECK: $x2 = COPY [[C2]](s64) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C3]](s8) + ; CHECK: $w3 = COPY [[SEXT]](s32) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C4]](s16) + ; CHECK: $w4 = COPY [[SEXT1]](s32) + ; CHECK: $x5 = COPY [[C5]](s64) + ; CHECK: $x6 = COPY [[C6]](s64) + ; CHECK: $x7 = COPY [[C7]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sp + ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64) + ; CHECK: G_STORE [[C8]](s8), [[PTR_ADD]](p0) :: (store 1 into stack) + ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s64) + ; CHECK: G_STORE [[C9]](s16), [[PTR_ADD1]](p0) :: (store 2 into stack + 2, align 1) + ; CHECK: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s64) + ; CHECK: G_STORE [[C10]](s8), [[PTR_ADD2]](p0) :: (store 1 into stack + 4) + ; CHECK: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s64) + ; CHECK: G_STORE [[C11]](s8), [[PTR_ADD3]](p0) :: (store 1 into stack + 5) + ; CHECK: BL @i8i16callee, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3, implicit $w4, implicit $x5, implicit $x6, implicit $x7, implicit-def $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: ADJCALLSTACKUP 6, 0, implicit-def $sp, implicit $sp + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: $w0 = COPY [[TRUNC]](s32) + ; CHECK: RET_ReallyLR implicit $w0 +entry: + %call = tail call i64 @i8i16callee(i64 0, i64 1, i64 2, i8 signext 3, i16 signext 4, i64 5, i64 6, i64 7, i8 97, i16 98, i8 99, i8 100) + %conv = trunc i64 %call to i32 + ret i32 %conv +} + diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll index 21cc3f82afdf17..cb3dd9a1809712 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll @@ -1,84 +1,100 @@ -; RUN: llc -O0 -stop-after=irtranslator -global-isel -global-isel-abort=1 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -O0 -stop-after=irtranslator -global-isel -global-isel-abort=1 -verify-machineinstrs %s -o - | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64-linux-gnu" -; CHECK-LABEL: name: args_i32 -; CHECK: %[[ARG0:[0-9]+]]:_(s32) = COPY $w0 -; CHECK: %{{[0-9]+}}:_(s32) = COPY $w1 -; CHECK: %{{[0-9]+}}:_(s32) = COPY $w2 -; CHECK: %{{[0-9]+}}:_(s32) = COPY $w3 -; CHECK: %{{[0-9]+}}:_(s32) = COPY $w4 -; CHECK: %{{[0-9]+}}:_(s32) = COPY $w5 -; CHECK: %{{[0-9]+}}:_(s32) = COPY $w6 -; CHECK: %{{[0-9]+}}:_(s32) = COPY $w7 -; CHECK: $w0 = COPY %[[ARG0]] - define i32 @args_i32(i32 %w0, i32 %w1, i32 %w2, i32 %w3, + ; CHECK-LABEL: name: args_i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $w0, $w1, $w2, $w3, $w4, $w5, $w6, $w7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $w4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $w5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $w6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $w7 + ; CHECK: $w0 = COPY [[COPY]](s32) + ; CHECK: RET_ReallyLR implicit $w0 i32 %w4, i32 %w5, i32 %w6, i32 %w7) { ret i32 %w0 } -; CHECK-LABEL: name: args_i64 -; CHECK: %[[ARG0:[0-9]+]]:_(s64) = COPY $x0 -; CHECK: %{{[0-9]+}}:_(s64) = COPY $x1 -; CHECK: %{{[0-9]+}}:_(s64) = COPY $x2 -; CHECK: %{{[0-9]+}}:_(s64) = COPY $x3 -; CHECK: %{{[0-9]+}}:_(s64) = COPY $x4 -; CHECK: %{{[0-9]+}}:_(s64) = COPY $x5 -; CHECK: %{{[0-9]+}}:_(s64) = COPY $x6 -; CHECK: %{{[0-9]+}}:_(s64) = COPY $x7 -; CHECK: $x0 = COPY %[[ARG0]] define i64 @args_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3, + ; CHECK-LABEL: name: args_i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $x5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7 + ; CHECK: $x0 = COPY [[COPY]](s64) + ; CHECK: RET_ReallyLR implicit $x0 i64 %x4, i64 %x5, i64 %x6, i64 %x7) { ret i64 %x0 } -; CHECK-LABEL: name: args_ptrs -; CHECK: %[[ARG0:[0-9]+]]:_(p0) = COPY $x0 -; CHECK: %{{[0-9]+}}:_(p0) = COPY $x1 -; CHECK: %{{[0-9]+}}:_(p0) = COPY $x2 -; CHECK: %{{[0-9]+}}:_(p0) = COPY $x3 -; CHECK: %{{[0-9]+}}:_(p0) = COPY $x4 -; CHECK: %{{[0-9]+}}:_(p0) = COPY $x5 -; CHECK: %{{[0-9]+}}:_(p0) = COPY $x6 -; CHECK: %{{[0-9]+}}:_(p0) = COPY $x7 -; CHECK: $x0 = COPY %[[ARG0]] define i8* @args_ptrs(i8* %x0, i16* %x1, <2 x i8>* %x2, {i8, i16, i32}* %x3, + ; CHECK-LABEL: name: args_ptrs + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3 + ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x4 + ; CHECK: [[COPY5:%[0-9]+]]:_(p0) = COPY $x5 + ; CHECK: [[COPY6:%[0-9]+]]:_(p0) = COPY $x6 + ; CHECK: [[COPY7:%[0-9]+]]:_(p0) = COPY $x7 + ; CHECK: $x0 = COPY [[COPY]](p0) + ; CHECK: RET_ReallyLR implicit $x0 [3 x float]* %x4, double* %x5, i8* %x6, i8* %x7) { ret i8* %x0 } -; CHECK-LABEL: name: args_arr -; CHECK: %[[ARG0:[0-9]+]]:_(s64) = COPY $d0 -; CHECK: $d0 = COPY %[[ARG0]] define [1 x double] @args_arr([1 x double] %d0) { + ; CHECK-LABEL: name: args_arr + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: $d0 = COPY [[COPY]](s64) + ; CHECK: RET_ReallyLR implicit $d0 ret [1 x double] %d0 } -; CHECK-LABEL: name: test_varargs -; CHECK: [[ANSWER:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 -; CHECK: [[D_ONE:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 -; CHECK: [[TWELVE:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 -; CHECK: [[THREE:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 -; CHECK: [[ONE:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 -; CHECK: [[FOUR:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 -; CHECK: [[F_ONE:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 -; CHECK: [[TWO:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 - -; CHECK: $w0 = COPY [[ANSWER]] -; CHECK: $d0 = COPY [[D_ONE]] -; CHECK: $x1 = COPY [[TWELVE]] -; CHECK: [[THREE_TMP:%[0-9]+]]:_(s32) = G_ANYEXT [[THREE]] -; CHECK: $w2 = COPY [[THREE_TMP]](s32) -; CHECK: [[ONE_TMP:%[0-9]+]]:_(s32) = G_ANYEXT [[ONE]] -; CHECK: $w3 = COPY [[ONE_TMP]](s32) -; CHECK: $w4 = COPY [[FOUR]](s32) -; CHECK: $s1 = COPY [[F_ONE]](s32) -; CHECK: $d2 = COPY [[TWO]](s64) declare void @varargs(i32, double, i64, ...) define void @test_varargs() { + ; CHECK-LABEL: name: test_varargs + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CHECK: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 + ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $w0 = COPY [[C]](s32) + ; CHECK: $d0 = COPY [[C1]](s64) + ; CHECK: $x1 = COPY [[C2]](s64) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C3]](s8) + ; CHECK: $w2 = COPY [[ANYEXT]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[C4]](s16) + ; CHECK: $w3 = COPY [[ANYEXT1]](s32) + ; CHECK: $w4 = COPY [[C5]](s32) + ; CHECK: $s1 = COPY [[C6]](s32) + ; CHECK: $d2 = COPY [[C7]](s64) + ; CHECK: BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $w2, implicit $w3, implicit $w4, implicit $s1, implicit $d2 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i8 3, i16 1, i32 4, float 1.0, double 2.0) ret void } @@ -88,46 +104,123 @@ define void @test_varargs() { ; stack. The ADJCALLSTACK ops should reflect this, even if the difference is ; theoretical. declare void @stack_ext_needed([8 x i64], i8 signext %in) -; CHECK-LABEL: name: test_stack_ext_needed -; CHECK: ADJCALLSTACKDOWN 8 -; CHECK: BL @stack_ext_needed -; CHECK: ADJCALLSTACKUP 8 define void @test_stack_ext_needed() { + ; CHECK-LABEL: name: test_stack_ext_needed + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 42 + ; CHECK: ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[DEF]](s64) + ; CHECK: $x1 = COPY [[DEF]](s64) + ; CHECK: $x2 = COPY [[DEF]](s64) + ; CHECK: $x3 = COPY [[DEF]](s64) + ; CHECK: $x4 = COPY [[DEF]](s64) + ; CHECK: $x5 = COPY [[DEF]](s64) + ; CHECK: $x6 = COPY [[DEF]](s64) + ; CHECK: $x7 = COPY [[DEF]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sp + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store 1 into stack) + ; CHECK: BL @stack_ext_needed, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7 + ; CHECK: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR call void @stack_ext_needed([8 x i64] undef, i8 signext 42) ret void } ; Check that we can lower incoming i128 types into constituent s64 gprs. -; CHECK-LABEL: name: callee_s128 -; CHECK: liveins: $x0, $x1, $x2, $x3, $x4 -; CHECK: [[A1_P1:%[0-9]+]]:_(s64) = COPY $x0 -; CHECK: [[A1_P2:%[0-9]+]]:_(s64) = COPY $x1 -; CHECK: [[A1_MERGE:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[A1_P1]](s64), [[A1_P2]](s64) -; CHECK: [[A2_P1:%[0-9]+]]:_(s64) = COPY $x2 -; CHECK: [[A2_P2:%[0-9]+]]:_(s64) = COPY $x3 -; CHECK: [[A2_MERGE:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[A2_P1]](s64), [[A2_P2]](s64) -; CHECK: G_STORE [[A2_MERGE]](s128) define void @callee_s128(i128 %a, i128 %b, i128 *%ptr) { + ; CHECK-LABEL: name: callee_s128 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0, $x1, $x2, $x3, $x4 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY2]](s64), [[COPY3]](s64) + ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x4 + ; CHECK: G_STORE [[MV1]](s128), [[COPY4]](p0) :: (store 16 into %ir.ptr) + ; CHECK: RET_ReallyLR store i128 %b, i128 *%ptr ret void } ; Check we can lower outgoing s128 arguments into s64 gprs. -; CHECK-LABEL: name: caller_s128 -; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[LARGE_VAL:%[0-9]+]]:_(s128) = G_LOAD [[PTR]](p0) -; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp -; CHECK: [[A1_P1:%[0-9]+]]:_(s64), [[A1_P2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LARGE_VAL]](s128) -; CHECK: [[A2_P1:%[0-9]+]]:_(s64), [[A2_P2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %1(s128) -; CHECK: $x0 = COPY [[A1_P1]](s64) -; CHECK: $x1 = COPY [[A1_P2]](s64) -; CHECK: $x2 = COPY [[A2_P1]](s64) -; CHECK: $x3 = COPY [[A2_P2]](s64) -; CHECK: $x4 = COPY [[PTR]](p0) -; CHECK: BL @callee_s128, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4 -; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp define void @caller_s128(i128 *%ptr) { + ; CHECK-LABEL: name: caller_s128 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.ptr) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128) + ; CHECK: $x0 = COPY [[UV]](s64) + ; CHECK: $x1 = COPY [[UV1]](s64) + ; CHECK: $x2 = COPY [[UV2]](s64) + ; CHECK: $x3 = COPY [[UV3]](s64) + ; CHECK: $x4 = COPY [[COPY]](p0) + ; CHECK: BL @callee_s128, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR %v = load i128, i128 *%ptr call void @callee_s128(i128 %v, i128 %v, i128 *%ptr) ret void } + + +declare i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext %a5, i64 %a6, i64 %a7, i64 %a8, i8 signext %b1, i16 signext %b2, i8 signext %b3, i8 signext %b4) nounwind readnone noinline + +define i32 @i8i16caller() nounwind readnone { + ; CHECK-LABEL: name: i8i16caller + ; CHECK: bb.1.entry: + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 + ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; CHECK: [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 97 + ; CHECK: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 98 + ; CHECK: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 99 + ; CHECK: [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 100 + ; CHECK: ADJCALLSTACKDOWN 32, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[C]](s64) + ; CHECK: $x1 = COPY [[C1]](s64) + ; CHECK: $x2 = COPY [[C2]](s64) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C3]](s8) + ; CHECK: $w3 = COPY [[SEXT]](s32) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C4]](s16) + ; CHECK: $w4 = COPY [[SEXT1]](s32) + ; CHECK: $x5 = COPY [[C5]](s64) + ; CHECK: $x6 = COPY [[C6]](s64) + ; CHECK: $x7 = COPY [[C7]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sp + ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64) + ; CHECK: G_STORE [[C8]](s8), [[PTR_ADD]](p0) :: (store 1 into stack) + ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s64) + ; CHECK: G_STORE [[C9]](s16), [[PTR_ADD1]](p0) :: (store 2 into stack + 8, align 1) + ; CHECK: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s64) + ; CHECK: G_STORE [[C10]](s8), [[PTR_ADD2]](p0) :: (store 1 into stack + 16) + ; CHECK: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s64) + ; CHECK: G_STORE [[C11]](s8), [[PTR_ADD3]](p0) :: (store 1 into stack + 24) + ; CHECK: BL @i8i16callee, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3, implicit $w4, implicit $x5, implicit $x6, implicit $x7, implicit-def $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: ADJCALLSTACKUP 32, 0, implicit-def $sp, implicit $sp + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: $w0 = COPY [[TRUNC]](s32) + ; CHECK: RET_ReallyLR implicit $w0 +entry: + %call = tail call i64 @i8i16callee(i64 0, i64 1, i64 2, i8 signext 3, i16 signext 4, i64 5, i64 6, i64 7, i8 97, i16 98, i8 99, i8 100) + %conv = trunc i64 %call to i32 + ret i32 %conv +} + diff --git a/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll b/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll new file mode 100644 index 00000000000000..d69f5ffac781ad --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +@lds = internal addrspace(3) global [576 x double] undef, align 16 + +; Stores to the same address appear multiple places in the same +; block. When sorted by offset, the merges would fail. We should form +; two groupings of ds_write2_b64 on either side of the fence. +define amdgpu_kernel void @same_address_fence_merge_write2() #0 { +; GCN-LABEL: same_address_fence_merge_write2: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_mov_b32 s0, 0 +; GCN-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GCN-NEXT: s_mov_b32 s1, 0x40100000 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_add_u32_e32 v3, 0x840, v2 +; GCN-NEXT: v_add_u32_e32 v4, 0xc60, v2 +; GCN-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset1:66 +; GCN-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset0:132 offset1:198 +; GCN-NEXT: ds_write2_b64 v3, v[0:1], v[0:1] offset1:66 +; GCN-NEXT: ds_write2_b64 v4, v[0:1], v[0:1] offset1:66 +; GCN-NEXT: s_mov_b32 s1, 0x3ff00000 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_barrier +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset1:66 +; GCN-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset0:132 offset1:198 +; GCN-NEXT: ds_write2_b64 v3, v[0:1], v[0:1] offset1:66 +; GCN-NEXT: ds_write2_b64 v4, v[0:1], v[0:1] offset1:66 +; GCN-NEXT: s_endpgm +bb: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 + %tmp1 = getelementptr inbounds [576 x double], [576 x double] addrspace(3)* @lds, i32 0, i32 %tmp + store double 4.000000e+00, double addrspace(3)* %tmp1, align 8 + %tmp2 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 66 + store double 4.000000e+00, double addrspace(3)* %tmp2, align 8 + %tmp3 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 132 + store double 4.000000e+00, double addrspace(3)* %tmp3, align 8 + %tmp4 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 198 + store double 4.000000e+00, double addrspace(3)* %tmp4, align 8 + %tmp5 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 264 + store double 4.000000e+00, double addrspace(3)* %tmp5, align 8 + %tmp6 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 330 + store double 4.000000e+00, double addrspace(3)* %tmp6, align 8 + %tmp7 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 396 + store double 4.000000e+00, double addrspace(3)* %tmp7, align 8 + %tmp8 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 462 + store double 4.000000e+00, double addrspace(3)* %tmp8, align 8 + fence syncscope("workgroup") release + tail call void @llvm.amdgcn.s.barrier() + fence syncscope("workgroup") acquire + store double 1.000000e+00, double addrspace(3)* %tmp1, align 8 + store double 1.000000e+00, double addrspace(3)* %tmp2, align 8 + store double 1.000000e+00, double addrspace(3)* %tmp3, align 8 + store double 1.000000e+00, double addrspace(3)* %tmp4, align 8 + store double 1.000000e+00, double addrspace(3)* %tmp5, align 8 + store double 1.000000e+00, double addrspace(3)* %tmp6, align 8 + store double 1.000000e+00, double addrspace(3)* %tmp7, align 8 + store double 1.000000e+00, double addrspace(3)* %tmp8, align 8 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare void @llvm.amdgcn.s.barrier() #1 + +attributes #0 = { nounwind readnone speculatable } +attributes #1 = { convergent nounwind } + +!0 = !{i32 0, i32 1024} diff --git a/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll b/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll index a7c859a1815fe1..b78909d4adb343 100644 --- a/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll +++ b/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll @@ -7,8 +7,8 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" ; CHECK: .p2align 2 ; CHECK-MIPS64-LABEL: .Lxray_sled_0: ; CHECK-MIPS32-LABEL: $xray_sled_0: -; CHECK-MIPS64: b .Ltmp0 -; CHECK-MIPS32: b $tmp0 +; CHECK-MIPS64: b .Ltmp1 +; CHECK-MIPS32: b $tmp1 ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -24,15 +24,15 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" ; CHECK-MIPS64: nop ; CHECK-MIPS64: nop ; CHECK-MIPS64: nop -; CHECK-MIPS64-LABEL: .Ltmp0: -; CHECK-MIPS32-LABEL: $tmp0: +; CHECK-MIPS64-LABEL: .Ltmp1: +; CHECK-MIPS32-LABEL: $tmp1: ; CHECK-MIPS32: addiu $25, $25, 52 ret i32 0 ; CHECK: .p2align 2 ; CHECK-MIPS64-LABEL: .Lxray_sled_1: +; CHECK-MIPS64-NEXT: b .Ltmp2 ; CHECK-MIPS32-LABEL: $xray_sled_1: -; CHECK-MIPS64: b .Ltmp1 -; CHECK-MIPS32: b $tmp1 +; CHECK-MIPS32-NEXT: b $tmp2 ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -48,8 +48,8 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" ; CHECK-MIPS64: nop ; CHECK-MIPS64: nop ; CHECK-MIPS64: nop -; CHECK-MIPS64-LABEL: .Ltmp1: -; CHECK-MIPS32-LABEL: $tmp1: +; CHECK-MIPS64-LABEL: .Ltmp2: +; CHECK-MIPS32-LABEL: $tmp2: ; CHECK-MIPS32: addiu $25, $25, 52 } ; CHECK: .section xray_instr_map,{{.*}} @@ -63,9 +63,9 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" define i32 @bar(i32 %i) nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK: .p2align 2 ; CHECK-MIPS64-LABEL: .Lxray_sled_2: +; CHECK-MIPS64-NEXT: b .Ltmp4 ; CHECK-MIPS32-LABEL: $xray_sled_2: -; CHECK-MIPS64: b .Ltmp2 -; CHECK-MIPS32: b $tmp2 +; CHECK-MIPS32-NEXT: b $tmp4 ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -81,8 +81,8 @@ define i32 @bar(i32 %i) nounwind noinline uwtable "function-instrument"="xray-al ; CHECK-MIPS64: nop ; CHECK-MIPS64: nop ; CHECK-MIPS64: nop -; CHECK-MIPS64-LABEL: .Ltmp2: -; CHECK-MIPS32-LABEL: $tmp2: +; CHECK-MIPS64-LABEL: .Ltmp4: +; CHECK-MIPS32-LABEL: $tmp4: ; CHECK-MIPS32: addiu $25, $25, 52 Test: %cond = icmp eq i32 %i, 0 @@ -91,9 +91,9 @@ IsEqual: ret i32 0 ; CHECK: .p2align 2 ; CHECK-MIPS64-LABEL: .Lxray_sled_3: +; CHECK-MIPS64-NEXT: b .Ltmp5 ; CHECK-MIPS32-LABEL: $xray_sled_3: -; CHECK-MIPS64: b .Ltmp3 -; CHECK-MIPS32: b $tmp3 +; CHECK-MIPS32-NEXT: b $tmp5 ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -109,16 +109,16 @@ IsEqual: ; CHECK-MIPS64: nop ; CHECK-MIPS64: nop ; CHECK-MIPS64: nop -; CHECK-MIPS64-LABEL: .Ltmp3: -; CHECK-MIPS32-LABEL: $tmp3: +; CHECK-MIPS64-LABEL: .Ltmp5: +; CHECK-MIPS32-LABEL: $tmp5: ; CHECK-MIPS32: addiu $25, $25, 52 NotEqual: ret i32 1 ; CHECK: .p2align 2 ; CHECK-MIPS64-LABEL: .Lxray_sled_4: +; CHECK-MIPS64-NEXT: b .Ltmp6 ; CHECK-MIPS32-LABEL: $xray_sled_4: -; CHECK-MIPS64: b .Ltmp4 -; CHECK-MIPS32: b $tmp4 +; CHECK-MIPS32-NEXT: b $tmp6 ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -134,8 +134,8 @@ NotEqual: ; CHECK-MIPS64: nop ; CHECK-MIPS64: nop ; CHECK-MIPS64: nop -; CHECK-MIPS64-LABEL: .Ltmp4: -; CHECK-MIPS32-LABEL: $tmp4: +; CHECK-MIPS64-LABEL: .Ltmp6: +; CHECK-MIPS32-LABEL: $tmp6: ; CHECK-MIPS32: addiu $25, $25, 52 } ; CHECK: .section xray_instr_map,{{.*}} diff --git a/llvm/test/CodeGen/PowerPC/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/PowerPC/xray-attribute-instrumentation.ll index 63c34be51ec374..f73679001158d6 100644 --- a/llvm/test/CodeGen/PowerPC/xray-attribute-instrumentation.ll +++ b/llvm/test/CodeGen/PowerPC/xray-attribute-instrumentation.ll @@ -3,6 +3,8 @@ ; RUN: -relocation-model=pic < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { +; CHECK-LABEL: foo: +; CHECK-NEXT: .Lfunc_begin0: ; CHECK-LABEL: .Ltmp0: ; CHECK: b .Ltmp1 ; CHECK-NEXT: nop @@ -26,14 +28,14 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" ; CHECK: .Lxray_sleds_start0: ; CHECK-NEXT: .Ltmp3: ; CHECK-NEXT: .quad .Ltmp0-.Ltmp3 -; CHECK-NEXT: .quad foo +; CHECK-NEXT: .quad .Lfunc_begin0-(.Ltmp3+8) ; CHECK-NEXT: .byte 0x00 ; CHECK-NEXT: .byte 0x01 ; CHECK-NEXT: .byte 0x02 ; CHECK-NEXT: .space 13 ; CHECK-NEXT: .Ltmp4: ; CHECK-NEXT: .quad .Ltmp2-.Ltmp4 -; CHECK-NEXT: .quad foo +; CHECK-NEXT: .quad .Lfunc_begin0-(.Ltmp4+8) ; CHECK-NEXT: .byte 0x01 ; CHECK-NEXT: .byte 0x01 ; CHECK-NEXT: .byte 0x02 diff --git a/llvm/test/CodeGen/X86/xray-log-args.ll b/llvm/test/CodeGen/X86/xray-log-args.ll index 7ec1b34094d341..812e04a483fb76 100644 --- a/llvm/test/CodeGen/X86/xray-log-args.ll +++ b/llvm/test/CodeGen/X86/xray-log-args.ll @@ -6,17 +6,20 @@ define i32 @callee(i32 %arg) nounwind noinline uwtable "function-instrument"="xray-always" "xray-log-args"="1" { ret i32 %arg } +; CHECK-LABEL: callee: +; CHECK-NEXT: Lfunc_begin0: + ; CHECK-LABEL: Lxray_sleds_start0: ; CHECK-NEXT: Ltmp0: ; CHECK-NEXT: .quad {{\.?}}Lxray_sled_0-{{\.?}}Ltmp0 -; CHECK-NEXT: .quad {{_?}}callee +; CHECK-NEXT: .quad {{\.?}}Lfunc_begin0-({{\.?}}Ltmp0+8) ; CHECK-NEXT: .byte 0x03 ; CHECK-NEXT: .byte 0x01 ; CHECK-NEXT: .byte 0x02 ; CHECK: .{{(zero|space)}} 13 ; CHECK: Ltmp1: ; CHECK-NEXT: .quad {{\.?}}Lxray_sled_1-{{\.?}}Ltmp1 -; CHECK-NEXT: .quad {{_?}}callee +; CHECK-NEXT: .quad {{\.?}}Lfunc_begin0-({{\.?}}Ltmp1+8) ; CHECK-NEXT: .byte 0x01 ; CHECK-NEXT: .byte 0x01 ; CHECK-NEXT: .byte 0x02 @@ -29,14 +32,14 @@ define i32 @caller(i32 %arg) nounwind noinline uwtable "function-instrument"="xr ; CHECK-LABEL: Lxray_sleds_start1: ; CHECK-NEXT: Ltmp3: ; CHECK-NEXT: .quad {{\.?}}Lxray_sled_2-{{\.?}}Ltmp3 -; CHECK-NEXT: .quad {{_?}}caller +; CHECK-NEXT: .quad {{\.?}}Lfunc_begin1-({{\.?}}Ltmp3+8) ; CHECK-NEXT: .byte 0x03 ; CHECK-NEXT: .byte 0x01 ; CHECK-NEXT: .byte 0x02 ; CHECK: .{{(zero|space)}} 13 ; CHECK: Ltmp4: ; CHECK-NEXT: .quad {{\.?}}Lxray_sled_3-{{\.?}}Ltmp4 -; CHECK-NEXT: .quad {{_?}}caller +; CHECK-NEXT: .quad {{\.?}}Lfunc_begin1-({{\.?}}Ltmp4+8) ; CHECK-NEXT: .byte 0x02 ; CHECK-NEXT: .byte 0x01 ; CHECK-NEXT: .byte 0x02 diff --git a/llvm/test/MC/RISCV/debug-valid.s b/llvm/test/MC/RISCV/debug-valid.s new file mode 100644 index 00000000000000..89b8f001b2cefb --- /dev/null +++ b/llvm/test/MC/RISCV/debug-valid.s @@ -0,0 +1,14 @@ +# RUN: llvm-mc %s -triple=riscv32 -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST %s +# RUN: llvm-mc %s -triple=riscv64 -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST %s +# RUN: llvm-mc -filetype=obj -triple riscv32 < %s \ +# RUN: | llvm-objdump -M no-aliases -d - \ +# RUN: | FileCheck -check-prefix=CHECK-INST %s +# RUN: llvm-mc -filetype=obj -triple riscv64 < %s \ +# RUN: | llvm-objdump -M no-aliases -d - \ +# RUN: | FileCheck -check-prefix=CHECK-INST %s + +# CHECK-INST: dret +# CHECK: encoding: [0x73,0x00,0x20,0x7b] +dret diff --git a/llvm/tools/llvm-xray/xray-extract.cpp b/llvm/tools/llvm-xray/xray-extract.cpp index 6ea0f59e5eb956..8304d2d27afa34 100644 --- a/llvm/tools/llvm-xray/xray-extract.cpp +++ b/llvm/tools/llvm-xray/xray-extract.cpp @@ -63,9 +63,9 @@ void exportAsYAML(const InstrumentationMap &Map, raw_ostream &OS, auto FuncId = Map.getFunctionId(Sled.Function); if (!FuncId) return; - YAMLSleds.push_back({*FuncId, Sled.Address, Sled.Function, Sled.Kind, - Sled.AlwaysInstrument, - ExtractSymbolize ? FH.SymbolOrNumber(*FuncId) : ""}); + YAMLSleds.push_back( + {*FuncId, Sled.Address, Sled.Function, Sled.Kind, Sled.AlwaysInstrument, + ExtractSymbolize ? FH.SymbolOrNumber(*FuncId) : "", Sled.Version}); } Output Out(OS, nullptr, 0); Out << YAMLSleds; diff --git a/llvm/unittests/ADT/BitVectorTest.cpp b/llvm/unittests/ADT/BitVectorTest.cpp index efefd2b90be8c9..0f15a478e4523c 100644 --- a/llvm/unittests/ADT/BitVectorTest.cpp +++ b/llvm/unittests/ADT/BitVectorTest.cpp @@ -297,6 +297,18 @@ TYPED_TEST(BitVectorTest, SimpleFindOpsSingleWord) { EXPECT_EQ(-1, A.find_last_unset()); A.resize(20); + ASSERT_TRUE(SmallBitVectorIsSmallMode(A)); + EXPECT_EQ(-1, A.find_first()); + EXPECT_EQ(-1, A.find_last()); + EXPECT_EQ(-1, A.find_next(5)); + EXPECT_EQ(-1, A.find_next(19)); + EXPECT_EQ(-1, A.find_prev(5)); + EXPECT_EQ(-1, A.find_prev(20)); + EXPECT_EQ(0, A.find_first_unset()); + EXPECT_EQ(19, A.find_last_unset()); + EXPECT_EQ(6, A.find_next_unset(5)); + EXPECT_EQ(-1, A.find_next_unset(19)); + A.set(3); A.set(4); A.set(16); @@ -319,6 +331,19 @@ TYPED_TEST(BitVectorTest, SimpleFindOpsSingleWord) { EXPECT_EQ(5, A.find_next_unset(4)); EXPECT_EQ(13, A.find_next_unset(12)); EXPECT_EQ(17, A.find_next_unset(15)); + + A.set(); + ASSERT_TRUE(SmallBitVectorIsSmallMode(A)); + EXPECT_EQ(0, A.find_first()); + EXPECT_EQ(19, A.find_last()); + EXPECT_EQ(6, A.find_next(5)); + EXPECT_EQ(-1, A.find_next(19)); + EXPECT_EQ(4, A.find_prev(5)); + EXPECT_EQ(19, A.find_prev(20)); + EXPECT_EQ(-1, A.find_first_unset()); + EXPECT_EQ(-1, A.find_last_unset()); + EXPECT_EQ(-1, A.find_next_unset(5)); + EXPECT_EQ(-1, A.find_next_unset(19)); } TEST(BitVectorTest, FindInRangeMultiWord) { diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 765dd54fe5ca64..b613cd2eccb9f6 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -157,6 +157,7 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, bool UpdateRefCount, bool HasCloseModifier) { void *rc = NULL; IsHostPtr = false; + IsNew = false; DataMapMtx.lock(); LookupResult lr = lookupMapping(HstPtrBegin, Size);