diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 3560f1066f290a..3c9f675be65ed2 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -2008,6 +2008,12 @@ void LinkerDriver::link(ArrayRef argsArr) { while (run()); } + // Create wrapped symbols for -wrap option. + std::vector wrapped = addWrappedSymbols(args); + // Load more object files that might be needed for wrapped symbols. + if (!wrapped.empty()) + while (run()); + if (config->autoImport) { // MinGW specific. // Load any further object files that might be needed for doing automatic @@ -2051,6 +2057,10 @@ void LinkerDriver::link(ArrayRef argsArr) { // references to the symbols we use from them. run(); + // Apply symbol renames for -wrap. + if (!wrapped.empty()) + wrapSymbols(wrapped); + // Resolve remaining undefined symbols and warn about imported locals. symtab->resolveRemainingUndefines(); if (errorCount()) diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 26a6e5b7b70d96..f657d8f0a808dd 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -148,6 +148,8 @@ class ObjFile : public InputFile { ArrayRef getGuardLJmpChunks() { return guardLJmpChunks; } ArrayRef getSymbols() { return symbols; } + MutableArrayRef getMutableSymbols() { return symbols; } + ArrayRef getDebugSection(StringRef secName); // Returns a Symbol object for the symbolIndex'th symbol in the diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp index bb44819e60f8d5..1fa685fb4620ed 100644 --- a/lld/COFF/LTO.cpp +++ b/lld/COFF/LTO.cpp @@ -139,6 +139,11 @@ void BitcodeCompiler::add(BitcodeFile &f) { r.VisibleToRegularObj = sym->isUsedInRegularObj; if (r.Prevailing) undefine(sym); + + // We tell LTO to not apply interprocedural optimization for wrapped + // (with -wrap) symbols because otherwise LTO would inline them while + // their values are still not final. + r.LinkerRedefined = !sym->canInline; } checkError(ltoObj->add(std::move(f.obj), resols)); } diff --git a/lld/COFF/MinGW.cpp b/lld/COFF/MinGW.cpp index e24cdca6ee34c8..f7f45464bcf540 100644 --- a/lld/COFF/MinGW.cpp +++ b/lld/COFF/MinGW.cpp @@ -7,9 +7,14 @@ //===----------------------------------------------------------------------===// #include "MinGW.h" +#include "Driver.h" +#include "InputFiles.h" #include "SymbolTable.h" #include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Object/COFF.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -173,3 +178,73 @@ void lld::coff::writeDefFile(StringRef name) { os << "\n"; } } + +static StringRef mangle(Twine sym) { + assert(config->machine != IMAGE_FILE_MACHINE_UNKNOWN); + if (config->machine == I386) + return saver.save("_" + sym); + return saver.save(sym); +} + +// Handles -wrap option. +// +// This function instantiates wrapper symbols. At this point, they seem +// like they are not being used at all, so we explicitly set some flags so +// that LTO won't eliminate them. +std::vector +lld::coff::addWrappedSymbols(opt::InputArgList &args) { + std::vector v; + DenseSet seen; + + for (auto *arg : args.filtered(OPT_wrap)) { + StringRef name = arg->getValue(); + if (!seen.insert(name).second) + continue; + + Symbol *sym = symtab->findUnderscore(name); + if (!sym) + continue; + + Symbol *real = symtab->addUndefined(mangle("__real_" + name)); + Symbol *wrap = symtab->addUndefined(mangle("__wrap_" + name)); + v.push_back({sym, real, wrap}); + + // These symbols may seem undefined initially, but don't bail out + // at symtab->reportUnresolvable() due to them, but let wrapSymbols + // below sort things out before checking finally with + // symtab->resolveRemainingUndefines(). + sym->deferUndefined = true; + real->deferUndefined = true; + // We want to tell LTO not to inline symbols to be overwritten + // because LTO doesn't know the final symbol contents after renaming. + real->canInline = false; + sym->canInline = false; + + // Tell LTO not to eliminate these symbols. + sym->isUsedInRegularObj = true; + if (!isa(wrap)) + wrap->isUsedInRegularObj = true; + } + return v; +} + +// Do renaming for -wrap by updating pointers to symbols. +// +// When this function is executed, only InputFiles and symbol table +// contain pointers to symbol objects. We visit them to replace pointers, +// so that wrapped symbols are swapped as instructed by the command line. +void lld::coff::wrapSymbols(ArrayRef wrapped) { + DenseMap map; + for (const WrappedSymbol &w : wrapped) { + map[w.sym] = w.wrap; + map[w.real] = w.sym; + } + + // Update pointers in input files. + parallelForEach(ObjFile::instances, [&](ObjFile *file) { + MutableArrayRef syms = file->getMutableSymbols(); + for (size_t i = 0, e = syms.size(); i != e; ++i) + if (Symbol *s = map.lookup(syms[i])) + syms[i] = s; + }); +} diff --git a/lld/COFF/MinGW.h b/lld/COFF/MinGW.h index 3d7a186aa19985..2f2bd119c33d2e 100644 --- a/lld/COFF/MinGW.h +++ b/lld/COFF/MinGW.h @@ -12,7 +12,10 @@ #include "Config.h" #include "Symbols.h" #include "lld/Common/LLVM.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringSet.h" +#include "llvm/Option/ArgList.h" +#include namespace lld { namespace coff { @@ -36,6 +39,24 @@ class AutoExporter { void writeDefFile(StringRef name); +// The -wrap option is a feature to rename symbols so that you can write +// wrappers for existing functions. If you pass `-wrap:foo`, all +// occurrences of symbol `foo` are resolved to `__wrap_foo` (so, you are +// expected to write `__wrap_foo` function as a wrapper). The original +// symbol becomes accessible as `__real_foo`, so you can call that from your +// wrapper. +// +// This data structure is instantiated for each -wrap option. +struct WrappedSymbol { + Symbol *sym; + Symbol *real; + Symbol *wrap; +}; + +std::vector addWrappedSymbols(llvm::opt::InputArgList &args); + +void wrapSymbols(ArrayRef wrapped); + } // namespace coff } // namespace lld diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index d27e95f9bd600f..a69fd7032e9ee7 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -252,6 +252,7 @@ def print_symbol_order: P< "print-symbol-order", "Print a symbol order specified by /call-graph-ordering-file and " "/call-graph-profile-sort into the specified file">; +def wrap : P_priv<"wrap">; // Flags for debugging def lldmap : F<"lldmap">; diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 173e32f628ef39..024a408ca45458 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -390,7 +390,7 @@ void SymbolTable::reportUnresolvable() { for (auto &i : symMap) { Symbol *sym = i.second; auto *undef = dyn_cast(sym); - if (!undef) + if (!undef || sym->deferUndefined) continue; if (undef->getWeakAlias()) continue; @@ -402,7 +402,7 @@ void SymbolTable::reportUnresolvable() { } if (name.contains("_PchSym_")) continue; - if (config->mingw && impSymbol(name)) + if (config->autoImport && impSymbol(name)) continue; undefs.insert(sym); } @@ -482,6 +482,7 @@ std::pair SymbolTable::insert(StringRef name) { sym = reinterpret_cast(make()); sym->isUsedInRegularObj = false; sym->pendingArchiveLoad = false; + sym->canInline = true; inserted = true; } return {sym, inserted}; diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 117e80e708da26..cdb7427dc77ce1 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -103,8 +103,8 @@ class Symbol { explicit Symbol(Kind k, StringRef n = "") : symbolKind(k), isExternal(true), isCOMDAT(false), writtenToSymtab(false), pendingArchiveLoad(false), isGCRoot(false), - isRuntimePseudoReloc(false), nameSize(n.size()), - nameData(n.empty() ? nullptr : n.data()) {} + isRuntimePseudoReloc(false), deferUndefined(false), canInline(true), + nameSize(n.size()), nameData(n.empty() ? nullptr : n.data()) {} const unsigned symbolKind : 8; unsigned isExternal : 1; @@ -130,6 +130,16 @@ class Symbol { unsigned isRuntimePseudoReloc : 1; + // True if we want to allow this symbol to be undefined in the early + // undefined check pass in SymbolTable::reportUnresolvable(), as it + // might be fixed up later. + unsigned deferUndefined : 1; + + // False if LTO shouldn't inline whatever this symbol points to. If a symbol + // is overwritten after LTO, LTO shouldn't inline the symbol because it + // doesn't know the final contents of the symbol. + uint8_t canInline : 1; + protected: // Symbol name length. Assume symbol lengths fit in a 32-bit integer. uint32_t nameSize; @@ -468,7 +478,9 @@ void replaceSymbol(Symbol *s, ArgT &&... arg) { "SymbolUnion not aligned enough"); assert(static_cast(static_cast(nullptr)) == nullptr && "Not a Symbol"); + bool canInline = s->canInline; new (s) T(std::forward(arg)...); + s->canInline = canInline; } } // namespace coff diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index 0a138d8a2303d0..fae5cb77ec5d01 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -377,6 +377,8 @@ bool mingw::link(ArrayRef argsArr, bool canExitEarly, add("-includeoptional:" + StringRef(a->getValue())); for (auto *a : args.filtered(OPT_delayload)) add("-delayload:" + StringRef(a->getValue())); + for (auto *a : args.filtered(OPT_wrap)) + add("-wrap:" + StringRef(a->getValue())); std::vector searchPaths; for (auto *a : args.filtered(OPT_L)) { diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td index 0604b458193cf7..7bc5936d58d662 100644 --- a/lld/MinGW/Options.td +++ b/lld/MinGW/Options.td @@ -91,6 +91,8 @@ defm whole_archive: B<"whole-archive", def v: Flag<["-"], "v">, HelpText<"Display the version number">; def verbose: F<"verbose">, HelpText<"Verbose mode">; def version: F<"version">, HelpText<"Display the version number and exit">; +defm wrap: Eq<"wrap", "Use wrapper functions for symbol">, + MetaVarName<"">; // LLD specific options def _HASH_HASH_HASH : Flag<["-"], "###">, diff --git a/lld/test/COFF/wrap-i386.s b/lld/test/COFF/wrap-i386.s new file mode 100644 index 00000000000000..fd1710f8c3cc5c --- /dev/null +++ b/lld/test/COFF/wrap-i386.s @@ -0,0 +1,49 @@ +// REQUIRES: x86 +// RUN: split-file %s %t.dir +// RUN: llvm-mc -filetype=obj -triple=i686-win32-gnu %t.dir/main.s -o %t.main.obj +// RUN: llvm-mc -filetype=obj -triple=i686-win32-gnu %t.dir/other.s -o %t.other.obj + +// RUN: lld-link -out:%t.exe %t.main.obj %t.other.obj -entry:entry -subsystem:console -debug:symtab -safeseh:no -wrap:foo -wrap:nosuchsym +// RUN: llvm-objdump -d --print-imm-hex %t.exe | FileCheck %s + +// CHECK: <_entry>: +// CHECK-NEXT: movl $0x11010, %edx +// CHECK-NEXT: movl $0x11010, %edx +// CHECK-NEXT: movl $0x11000, %edx + +// RUN: llvm-readobj --symbols %t.exe > %t.dump +// RUN: FileCheck --check-prefix=SYM1 %s < %t.dump +// RUN: FileCheck --check-prefix=SYM2 %s < %t.dump +// RUN: FileCheck --check-prefix=SYM3 %s < %t.dump + +// _foo = 0xffc11000 = 4290842624 +// ___wrap_foo = ffc11010 = 4290842640 +// SYM1: Name: _foo +// SYM1-NEXT: Value: 4290842624 +// SYM1-NEXT: Section: IMAGE_SYM_ABSOLUTE +// SYM1-NEXT: BaseType: Null +// SYM1-NEXT: ComplexType: Null +// SYM1-NEXT: StorageClass: External +// SYM2: Name: ___wrap_foo +// SYM2-NEXT: Value: 4290842640 +// SYM2-NEXT: Section: IMAGE_SYM_ABSOLUTE +// SYM2-NEXT: BaseType: Null +// SYM2-NEXT: ComplexType: Null +// SYM2-NEXT: StorageClass: External +// SYM3-NOT: Name: ___real_foo + +#--- main.s +.global _entry +_entry: + movl $_foo, %edx + movl $___wrap_foo, %edx + movl $___real_foo, %edx + +#--- other.s +.global _foo +.global ___wrap_foo +.global ___real_foo + +_foo = 0x11000 +___wrap_foo = 0x11010 +___real_foo = 0x11020 diff --git a/lld/test/COFF/wrap-import.ll b/lld/test/COFF/wrap-import.ll new file mode 100644 index 00000000000000..c9a72de36e32b8 --- /dev/null +++ b/lld/test/COFF/wrap-import.ll @@ -0,0 +1,36 @@ +// REQUIRES: x86 + +// Check that wrapping works when the wrapped symbol is imported from a +// different DLL. + +// RUN: split-file %s %t.dir +// RUN: llc %t.dir/main.ll -o %t.main.obj --filetype=obj +// RUN: llvm-as %t.dir/main.ll -o %t.main.bc +// RUN: llvm-mc -filetype=obj -triple=x86_64-win32-gnu %t.dir/lib.s -o %t.lib.obj + +// RUN: lld-link -dll -out:%t.lib.dll %t.lib.obj -noentry -export:func -implib:%t.lib.lib +// RUN: lld-link -out:%t.exe %t.main.obj %t.lib.lib -entry:entry -subsystem:console -wrap:func +// RUN: lld-link -out:%t.exe %t.main.bc %t.lib.lib -entry:entry -subsystem:console -wrap:func + +#--- main.ll +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-w64-windows-gnu" + +declare void @func() + +define void @entry() { + call void @func() + ret void +} + +declare void @__real_func() + +define void @__wrap_func() { + call void @__real_func() + ret void +} + +#--- lib.s +.global func +func: + ret diff --git a/lld/test/COFF/wrap-lto-1.ll b/lld/test/COFF/wrap-lto-1.ll new file mode 100644 index 00000000000000..dc2a99f485a40f --- /dev/null +++ b/lld/test/COFF/wrap-lto-1.ll @@ -0,0 +1,36 @@ +; REQUIRES: x86 +; LTO +; RUN: llvm-as %s -o %t.obj +; RUN: lld-link -out:%t.exe %t.obj -entry:entry -subsystem:console -wrap:bar -debug:symtab -lldsavetemps +; RUN: cat %t.exe.resolution.txt | FileCheck -check-prefix=RESOLS %s + +; ThinLTO +; RUN: opt -module-summary %s -o %t.obj +; RUN: lld-link -out:%t.exe %t.obj -entry:entry -subsystem:console -wrap:bar -debug:symtab -lldsavetemps +; RUN: cat %t.exe.resolution.txt | FileCheck -check-prefix=RESOLS %s + +; Make sure that the 'r' (linker redefined) bit is set for bar and __real_bar +; in the resolutions file. The calls to bar and __real_bar will be routed to +; __wrap_bar and bar, respectively. So they cannot be inlined. +; RESOLS: ,bar,pxr{{$}} +; RESOLS: ,__real_bar,xr{{$}} +; RESOLS: ,__wrap_bar,px{{$}} + +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-w64-windows-gnu" + +define void @bar() { + ret void +} + +define void @entry() { + call void @bar() + ret void +} + +declare void @__real_bar() + +define void @__wrap_bar() { + call void @__real_bar() + ret void +} diff --git a/lld/test/COFF/wrap-lto-2.ll b/lld/test/COFF/wrap-lto-2.ll new file mode 100644 index 00000000000000..bad611726f0956 --- /dev/null +++ b/lld/test/COFF/wrap-lto-2.ll @@ -0,0 +1,84 @@ +; REQUIRES: x86 +; RUN: split-file %s %t.dir +;; LTO +; RUN: llvm-as %t.dir/main.ll -o %t.main.bc +; RUN: llvm-as %t.dir/wrap.ll -o %t.wrap.bc +; RUN: llvm-as %t.dir/other.ll -o %t.other.bc +; RUN: rm -f %t.bc.lib +; RUN: llvm-ar rcs %t.bc.lib %t.wrap.bc %t.other.bc +;; ThinLTO +; RUN: opt -module-summary %t.dir/main.ll -o %t.main.thin +; RUN: opt -module-summary %t.dir/wrap.ll -o %t.wrap.thin +; RUN: opt -module-summary %t.dir/other.ll -o %t.other.thin +; RUN: rm -f %t.thin.lib +; RUN: llvm-ar rcs %t.thin.lib %t.wrap.thin %t.other.thin +;; Object +; RUN: llc %t.dir/main.ll -o %t.main.obj --filetype=obj +; RUN: llc %t.dir/wrap.ll -o %t.wrap.obj --filetype=obj +; RUN: llc %t.dir/other.ll -o %t.other.obj --filetype=obj +; RUN: rm -f %t.obj.lib +; RUN: llvm-ar rcs %t.obj.lib %t.wrap.obj %t.other.obj + +;; This test verifies that -wrap works correctly for inter-module references to +;; the wrapped symbol, when LTO or ThinLTO is involved. It checks for various +;; combinations of bitcode and regular objects. + +;; LTO + LTO +; RUN: lld-link -out:%t.bc-bc.exe %t.main.bc -libpath:%T %t.bc.lib -entry:entry -subsystem:console -wrap:bar -debug:symtab -lldsavetemps +; RUN: llvm-objdump -d %t.bc-bc.exe | FileCheck %s --check-prefixes=CHECK,JMP + +;; LTO + Object +; RUN: lld-link -out:%t.bc-obj.exe %t.main.bc -libpath:%T %t.obj.lib -entry:entry -subsystem:console -wrap:bar -debug:symtab -lldsavetemps +; RUN: llvm-objdump -d %t.bc-obj.exe | FileCheck %s --check-prefixes=CHECK,JMP + +;; Object + LTO +; RUN: lld-link -out:%t.obj-bc.exe %t.main.obj -libpath:%T %t.bc.lib -entry:entry -subsystem:console -wrap:bar -debug:symtab -lldsavetemps +; RUN: llvm-objdump -d %t.obj-bc.exe | FileCheck %s --check-prefixes=CHECK,CALL + +;; ThinLTO + ThinLTO +; RUN: lld-link -out:%t.thin-thin.exe %t.main.thin -libpath:%T %t.thin.lib -entry:entry -subsystem:console -wrap:bar -debug:symtab -lldsavetemps +; RUN: llvm-objdump -d %t.thin-thin.exe | FileCheck %s --check-prefixes=CHECK,JMP + +;; ThinLTO + Object +; RUN: lld-link -out:%t.thin-obj.exe %t.main.thin -libpath:%T %t.obj.lib -entry:entry -subsystem:console -wrap:bar -debug:symtab -lldsavetemps +; RUN: llvm-objdump -d %t.thin-obj.exe | FileCheck %s --check-prefixes=CHECK,JMP + +;; Object + ThinLTO +; RUN: lld-link -out:%t.obj-thin.exe %t.main.obj -libpath:%T %t.thin.lib -entry:entry -subsystem:console -wrap:bar -debug:symtab -lldsavetemps +; RUN: llvm-objdump -d %t.obj-thin.exe | FileCheck %s --check-prefixes=CHECK,CALL + +;; Make sure that calls in entry() are not eliminated and that bar is +;; routed to __wrap_bar. + +; CHECK: : +; CHECK: {{jmp|callq}}{{.*}}<__wrap_bar> + +;--- main.ll +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-w64-windows-gnu" + +declare void @bar() + +define void @entry() { + call void @bar() + ret void +} + +;--- wrap.ll +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-w64-windows-gnu" + +declare void @other() + +define void @__wrap_bar() { + call void @other() + ret void +} + +;--- other.ll +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-w64-windows-gnu" + +define void @other() { + ret void +} diff --git a/lld/test/COFF/wrap-real-missing.s b/lld/test/COFF/wrap-real-missing.s new file mode 100644 index 00000000000000..47b53d5d5fcf36 --- /dev/null +++ b/lld/test/COFF/wrap-real-missing.s @@ -0,0 +1,21 @@ +// REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64-win32-gnu %s -o %t.obj + +// RUN: not lld-link -lldmingw -out:%t.exe %t.obj -entry:entry -subsystem:console -wrap:foo 2>&1 | FileCheck %s + +// Check that we error out properly with an undefined symbol, if +// __real_foo is referenced and missing, even if the -lldmingw flag is set +// (which otherwise tolerates certain cases of references to missing +// sections, to tolerate certain GCC pecularities). + +// CHECK: error: undefined symbol: foo + +.global entry +entry: + call foo + ret + +.global __wrap_foo +__wrap_foo: + call __real_foo + ret diff --git a/lld/test/COFF/wrap-with-archive.s b/lld/test/COFF/wrap-with-archive.s new file mode 100644 index 00000000000000..96b244a65a45c9 --- /dev/null +++ b/lld/test/COFF/wrap-with-archive.s @@ -0,0 +1,29 @@ +// REQUIRES: x86 +// RUN: split-file %s %t.dir +// RUN: llvm-mc -filetype=obj -triple=x86_64-win32-gnu %t.dir/main.s -o %t.main.obj +// RUN: llvm-mc -filetype=obj -triple=x86_64-win32-gnu %t.dir/wrap.s -o %t.wrap.obj +// RUN: llvm-mc -filetype=obj -triple=x86_64-win32-gnu %t.dir/other.s -o %t.other.obj +// RUN: rm -f %t.lib +// RUN: llvm-ar rcs %t.lib %t.wrap.obj %t.other.obj + +// RUN: lld-link -out:%t.exe %t.main.obj -libpath:%T %t.lib -entry:entry -subsystem:console -wrap:foo + +// Note: No real definition of foo exists here, but that works fine as long +// as there's no actual references to __real_foo. + +#--- main.s +.global entry +entry: + call foo + ret + +#--- wrap.s +.global __wrap_foo +__wrap_foo: + call other_func + ret + +#--- other.s +.global other_func +other_func: + ret diff --git a/lld/test/COFF/wrap.s b/lld/test/COFF/wrap.s new file mode 100644 index 00000000000000..d0afb7f14cdc5f --- /dev/null +++ b/lld/test/COFF/wrap.s @@ -0,0 +1,51 @@ +// REQUIRES: x86 +// RUN: split-file %s %t.dir +// RUN: llvm-mc -filetype=obj -triple=x86_64-win32-gnu %t.dir/main.s -o %t.main.obj +// RUN: llvm-mc -filetype=obj -triple=x86_64-win32-gnu %t.dir/other.s -o %t.other.obj + +// RUN: lld-link -out:%t.exe %t.main.obj %t.other.obj -entry:entry -subsystem:console -debug:symtab -wrap:foo -wrap:nosuchsym +// RUN: llvm-objdump -d --print-imm-hex %t.exe | FileCheck %s +// RUN: lld-link -out:%t.exe %t.main.obj %t.other.obj -entry:entry -subsystem:console -debug:symtab -wrap:foo -wrap:foo -wrap:nosuchsym +// RUN: llvm-objdump -d --print-imm-hex %t.exe | FileCheck %s + +// CHECK: : +// CHECK-NEXT: movl $0x11010, %edx +// CHECK-NEXT: movl $0x11010, %edx +// CHECK-NEXT: movl $0x11000, %edx + +// RUN: llvm-readobj --symbols %t.exe > %t.dump +// RUN: FileCheck --check-prefix=SYM1 %s < %t.dump +// RUN: FileCheck --check-prefix=SYM2 %s < %t.dump +// RUN: FileCheck --check-prefix=SYM3 %s < %t.dump + +// foo = 0xC0011000 = 3221295104 +// __wrap_foo = 0xC0011010 = 3221295120 +// SYM1: Name: foo +// SYM1-NEXT: Value: 3221295104 +// SYM1-NEXT: Section: IMAGE_SYM_ABSOLUTE +// SYM1-NEXT: BaseType: Null +// SYM1-NEXT: ComplexType: Null +// SYM1-NEXT: StorageClass: External +// SYM2: Name: __wrap_foo +// SYM2-NEXT: Value: 3221295120 +// SYM2-NEXT: Section: IMAGE_SYM_ABSOLUTE +// SYM2-NEXT: BaseType: Null +// SYM2-NEXT: ComplexType: Null +// SYM2-NEXT: StorageClass: External +// SYM3-NOT: Name: __real_foo + +#--- main.s +.global entry +entry: + movl $foo, %edx + movl $__wrap_foo, %edx + movl $__real_foo, %edx + +#--- other.s +.global foo +.global __wrap_foo +.global __real_foo + +foo = 0x11000 +__wrap_foo = 0x11010 +__real_foo = 0x11020 diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test index 4ae8ac7f547cdb..015c2696392308 100644 --- a/lld/test/MinGW/driver.test +++ b/lld/test/MinGW/driver.test @@ -281,3 +281,7 @@ ALLOW_MULTIPLE_DEFINITION: -force:multiple RUN: ld.lld -### -m i386pep foo.o --allow-multiple-definition --no-allow-multiple-definition | FileCheck -check-prefix NO_ALLOW_MULTIPLE_DEFINITION %s RUN: ld.lld -### -m i386pep foo.o -allow-multiple-definition -no-allow-multiple-definition | FileCheck -check-prefix NO_ALLOW_MULTIPLE_DEFINITION %s NO_ALLOW_MULTIPLE_DEFINITION-NOT: -force:multiple + +RUN: ld.lld -### -m i386pep foo.o -wrap foo1 --wrap foo2 | FileCheck -check-prefix WRAP %s +RUN: ld.lld -### -m i386pep foo.o -wrap=foo1 --wrap=foo2 | FileCheck -check-prefix WRAP %s +WRAP: -wrap:foo1 -wrap:foo2 diff --git a/lldb/source/Plugins/Process/FreeBSDRemote/NativeProcessFreeBSD.cpp b/lldb/source/Plugins/Process/FreeBSDRemote/NativeProcessFreeBSD.cpp index e99d38f57eea1c..c234c0e023fbcf 100644 --- a/lldb/source/Plugins/Process/FreeBSDRemote/NativeProcessFreeBSD.cpp +++ b/lldb/source/Plugins/Process/FreeBSDRemote/NativeProcessFreeBSD.cpp @@ -93,7 +93,7 @@ NativeProcessFreeBSD::Factory::Launch(ProcessLaunchInfo &launch_info, pid, launch_info.GetPTY().ReleasePrimaryFileDescriptor(), native_delegate, Info.GetArchitecture(), mainloop)); - status = process_up->ReinitializeThreads(); + status = process_up->SetupTrace(); if (status.Fail()) return status.ToError(); @@ -125,6 +125,10 @@ NativeProcessFreeBSD::Factory::Attach( if (!status.Success()) return status.ToError(); + status = process_up->SetupTrace(); + if (status.Fail()) + return status.ToError(); + return std::move(process_up); } @@ -191,14 +195,26 @@ void NativeProcessFreeBSD::MonitorSIGTRAP(lldb::pid_t pid) { return; } assert(info.pl_event == PL_EVENT_SIGNAL); - // TODO: do we need to handle !PL_FLAG_SI? - assert(info.pl_flags & PL_FLAG_SI); - assert(info.pl_siginfo.si_signo == SIGTRAP); - - LLDB_LOG(log, "got SIGTRAP, pid = {0}, lwpid = {1}, si_code = {2}", pid, - info.pl_lwpid, info.pl_siginfo.si_code); + LLDB_LOG(log, "got SIGTRAP, pid = {0}, lwpid = {1}", pid, info.pl_lwpid); NativeThreadFreeBSD *thread = nullptr; + + if (info.pl_flags & (PL_FLAG_BORN | PL_FLAG_EXITED)) { + if (info.pl_flags & PL_FLAG_BORN) { + LLDB_LOG(log, "monitoring new thread, tid = {0}", info.pl_lwpid); + AddThread(info.pl_lwpid); + } else /*if (info.pl_flags & PL_FLAG_EXITED)*/ { + LLDB_LOG(log, "thread exited, tid = {0}", info.pl_lwpid); + RemoveThread(info.pl_lwpid); + } + + Status error = + PtraceWrapper(PT_CONTINUE, pid, reinterpret_cast(1), 0); + if (error.Fail()) + SetState(StateType::eStateInvalid); + return; + } + if (info.pl_lwpid > 0) { for (const auto &t : m_threads) { if (t->GetID() == static_cast(info.pl_lwpid)) { @@ -212,19 +228,23 @@ void NativeProcessFreeBSD::MonitorSIGTRAP(lldb::pid_t pid) { info.pl_lwpid); } - switch (info.pl_siginfo.si_code) { - case TRAP_BRKPT: - if (thread) { - thread->SetStoppedByBreakpoint(); - FixupBreakpointPCAsNeeded(*thread); + if (info.pl_flags & PL_FLAG_SI) { + assert(info.pl_siginfo.si_signo == SIGTRAP); + + switch (info.pl_siginfo.si_code) { + case TRAP_BRKPT: + if (thread) { + thread->SetStoppedByBreakpoint(); + FixupBreakpointPCAsNeeded(*thread); + } + SetState(StateType::eStateStopped, true); + break; + case TRAP_TRACE: + if (thread) + thread->SetStoppedByTrace(); + SetState(StateType::eStateStopped, true); + break; } - SetState(StateType::eStateStopped, true); - break; - case TRAP_TRACE: - if (thread) - thread->SetStoppedByTrace(); - SetState(StateType::eStateStopped, true); - break; } } @@ -743,6 +763,21 @@ NativeProcessFreeBSD::GetAuxvData() const { return buf; } +Status NativeProcessFreeBSD::SetupTrace() { + // Enable event reporting + int events; + Status status = + PtraceWrapper(PT_GET_EVENT_MASK, GetID(), &events, sizeof(events)); + if (status.Fail()) + return status; + events |= PTRACE_LWP; + status = PtraceWrapper(PT_SET_EVENT_MASK, GetID(), &events, sizeof(events)); + if (status.Fail()) + return status; + + return ReinitializeThreads(); +} + Status NativeProcessFreeBSD::ReinitializeThreads() { // Clear old threads m_threads.clear(); diff --git a/lldb/source/Plugins/Process/FreeBSDRemote/NativeProcessFreeBSD.h b/lldb/source/Plugins/Process/FreeBSDRemote/NativeProcessFreeBSD.h index 8b4ae98894745a..5900048610f5d2 100644 --- a/lldb/source/Plugins/Process/FreeBSDRemote/NativeProcessFreeBSD.h +++ b/lldb/source/Plugins/Process/FreeBSDRemote/NativeProcessFreeBSD.h @@ -107,6 +107,7 @@ class NativeProcessFreeBSD : public NativeProcessELF { void SigchldHandler(); Status Attach(); + Status SetupTrace(); Status ReinitializeThreads(); }; diff --git a/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD.cpp b/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD.cpp index 2a7dc3dbc44190..2a2995e548b6f6 100644 --- a/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD.cpp +++ b/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD.cpp @@ -25,11 +25,6 @@ NativeRegisterContextFreeBSD::NativeRegisterContextFreeBSD( RegisterInfoInterface *reg_info_interface_p) : NativeRegisterContextRegisterInfo(native_thread, reg_info_interface_p) {} -Status NativeRegisterContextFreeBSD::DoRegisterSet(int ptrace_req, void *buf) { - return NativeProcessFreeBSD::PtraceWrapper(ptrace_req, GetProcessPid(), buf, - m_thread.GetID()); -} - NativeProcessFreeBSD &NativeRegisterContextFreeBSD::GetProcess() { return static_cast(m_thread.GetProcess()); } diff --git a/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD.h b/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD.h index db32e216c9253d..0f7b1e95c52af2 100644 --- a/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD.h +++ b/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD.h @@ -37,7 +37,6 @@ class NativeRegisterContextFreeBSD : public NativeRegisterContextRegisterInfo { virtual Status ClearWatchpointHit(uint32_t wp_index) = 0; protected: - Status DoRegisterSet(int req, void *buf); virtual NativeProcessFreeBSD &GetProcess(); virtual ::pid_t GetProcessPid(); }; diff --git a/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD_x86_64.cpp b/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD_x86_64.cpp index fd8e600ca25f2c..3460f53a2934d8 100644 --- a/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD_x86_64.cpp +++ b/lldb/source/Plugins/Process/FreeBSDRemote/NativeRegisterContextFreeBSD_x86_64.cpp @@ -429,15 +429,19 @@ int NativeRegisterContextFreeBSD_x86_64::GetSetForNativeRegNum( Status NativeRegisterContextFreeBSD_x86_64::ReadRegisterSet(uint32_t set) { switch (set) { case GPRegSet: - return DoRegisterSet(PT_GETREGS, &m_gpr); + return NativeProcessFreeBSD::PtraceWrapper(PT_GETREGS, m_thread.GetID(), + &m_gpr); case FPRegSet: #if defined(__x86_64__) - return DoRegisterSet(PT_GETFPREGS, &m_fpr); + return NativeProcessFreeBSD::PtraceWrapper(PT_GETFPREGS, m_thread.GetID(), + &m_fpr); #else - return DoRegisterSet(PT_GETXMMREGS, &m_fpr); + return NativeProcessFreeBSD::PtraceWrapper(PT_GETXMMREGS, m_thread.GetID(), + &m_fpr); #endif case DBRegSet: - return DoRegisterSet(PT_GETDBREGS, &m_dbr); + return NativeProcessFreeBSD::PtraceWrapper(PT_GETDBREGS, m_thread.GetID(), + &m_dbr); case XSaveRegSet: { struct ptrace_xstate_info info; Status ret = NativeProcessFreeBSD::PtraceWrapper( @@ -466,15 +470,19 @@ Status NativeRegisterContextFreeBSD_x86_64::ReadRegisterSet(uint32_t set) { Status NativeRegisterContextFreeBSD_x86_64::WriteRegisterSet(uint32_t set) { switch (set) { case GPRegSet: - return DoRegisterSet(PT_SETREGS, &m_gpr); + return NativeProcessFreeBSD::PtraceWrapper(PT_SETREGS, m_thread.GetID(), + &m_gpr); case FPRegSet: #if defined(__x86_64__) - return DoRegisterSet(PT_SETFPREGS, &m_fpr); + return NativeProcessFreeBSD::PtraceWrapper(PT_SETFPREGS, m_thread.GetID(), + &m_fpr); #else - return DoRegisterSet(PT_SETXMMREGS, &m_fpr); + return NativeProcessFreeBSD::PtraceWrapper(PT_SETXMMREGS, m_thread.GetID(), + &m_fpr); #endif case DBRegSet: - return DoRegisterSet(PT_SETDBREGS, &m_dbr); + return NativeProcessFreeBSD::PtraceWrapper(PT_SETDBREGS, m_thread.GetID(), + &m_dbr); case XSaveRegSet: // ReadRegisterSet() must always be called before WriteRegisterSet(). assert(m_xsave.size() > 0); diff --git a/lldb/source/Plugins/Process/FreeBSDRemote/NativeThreadFreeBSD.cpp b/lldb/source/Plugins/Process/FreeBSDRemote/NativeThreadFreeBSD.cpp index 1517e7ff8ab5d5..d1f28387829979 100644 --- a/lldb/source/Plugins/Process/FreeBSDRemote/NativeThreadFreeBSD.cpp +++ b/lldb/source/Plugins/Process/FreeBSDRemote/NativeThreadFreeBSD.cpp @@ -40,32 +40,27 @@ NativeThreadFreeBSD::NativeThreadFreeBSD(NativeProcessFreeBSD &process, m_stop_description() {} Status NativeThreadFreeBSD::Resume() { - Status ret = NativeProcessFreeBSD::PtraceWrapper(PT_RESUME, m_process.GetID(), - nullptr, GetID()); + Status ret = NativeProcessFreeBSD::PtraceWrapper(PT_RESUME, GetID()); if (!ret.Success()) return ret; - ret = NativeProcessFreeBSD::PtraceWrapper(PT_CLEARSTEP, m_process.GetID(), - nullptr, GetID()); + ret = NativeProcessFreeBSD::PtraceWrapper(PT_CLEARSTEP, GetID()); if (ret.Success()) SetRunning(); return ret; } Status NativeThreadFreeBSD::SingleStep() { - Status ret = NativeProcessFreeBSD::PtraceWrapper(PT_RESUME, m_process.GetID(), - nullptr, GetID()); + Status ret = NativeProcessFreeBSD::PtraceWrapper(PT_RESUME, GetID()); if (!ret.Success()) return ret; - ret = NativeProcessFreeBSD::PtraceWrapper(PT_SETSTEP, m_process.GetID(), - nullptr, GetID()); + ret = NativeProcessFreeBSD::PtraceWrapper(PT_SETSTEP, GetID()); if (ret.Success()) SetStepping(); return ret; } Status NativeThreadFreeBSD::Suspend() { - Status ret = NativeProcessFreeBSD::PtraceWrapper( - PT_SUSPEND, m_process.GetID(), nullptr, GetID()); + Status ret = NativeProcessFreeBSD::PtraceWrapper(PT_SUSPEND, GetID()); if (ret.Success()) SetStopped(); return ret; diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index d59dc1b54d7644..0454d5abbba67a 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -206,12 +206,13 @@ TableGen provides "bang operators" that have a wide variety of uses: .. productionlist:: BangOperator: one of - : !add !and !cast !con !dag - : !empty !eq !foldl !foreach !ge - : !getop !gt !head !if !isa - : !le !listconcat !listsplat !lt !mul - : !ne !or !setop !shl !size - : !sra !srl !strconcat !subst !tail + : !add !and !cast !con !dag + : !empty !eq !foldl !foreach !ge + : !getop !gt !head !if !isa + : !le !listconcat !listsplat !lt !mul + : !ne !not !or !setop !shl + : !size !sra !srl !strconcat !subst + : !tail !xor The ``!cond`` operator has a slightly different syntax compared to other bang operators, so it is defined separately: @@ -1447,7 +1448,8 @@ and non-0 as true. ``!and(``\ *a*\ ``,`` *b*\ ``, ...)`` This operator does a bitwise AND on *a*, *b*, etc., and produces the - result. + result. A logical AND can be performed if all the arguments are either + 0 or 1. ``!cast<``\ *type*\ ``>(``\ *a*\ ``)`` This operator performs a cast on *a* and produces the result. @@ -1605,9 +1607,15 @@ and non-0 as true. The arguments must be ``bit``, ``int``, or ``string`` values. Use ``!cast`` to compare other types of objects. +``!not(``\ *a*\ ``)`` + This operator performs a logical NOT on *a*, which must be + an integer. The argument 0 results in 1 (true); any other + argument results in 0 (false). + ``!or(``\ *a*\ ``,`` *b*\ ``, ...)`` This operator does a bitwise OR on *a*, *b*, etc., and produces the - result. + result. A logical OR can be performed if all the arguments are either + 0 or 1. ``!setop(``\ *dag*\ ``,`` *op*\ ``)`` This operator produces a DAG node with the same arguments as *dag*, but with its @@ -1655,6 +1663,11 @@ and non-0 as true. This operator produces a new list with all the elements of the list *a* except for the zeroth one. (See also ``!head``.) +``!xor(``\ *a*\ ``,`` *b*\ ``, ...)`` + This operator does a bitwise EXCLUSIVE OR on *a*, *b*, etc., and produces + the result. A logical XOR can be performed if all the arguments are either + 0 or 1. + Appendix B: Sample Record ========================= diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h index 3ae4a2edbf96f5..41827181e30cf1 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h @@ -1,9 +1,8 @@ //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index c7009e400ebc2a..c0eb88b56be302 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -753,7 +753,7 @@ class OpInit : public TypedInit { /// class UnOpInit : public OpInit, public FoldingSetNode { public: - enum UnaryOp : uint8_t { CAST, HEAD, TAIL, SIZE, EMPTY, GETOP }; + enum UnaryOp : uint8_t { CAST, NOT, HEAD, TAIL, SIZE, EMPTY, GETOP }; private: Init *LHS; @@ -802,7 +802,7 @@ class UnOpInit : public OpInit, public FoldingSetNode { /// !op (X, Y) - Combine two inits. class BinOpInit : public OpInit, public FoldingSetNode { public: - enum BinaryOp : uint8_t { ADD, MUL, AND, OR, SHL, SRA, SRL, LISTCONCAT, + enum BinaryOp : uint8_t { ADD, MUL, AND, OR, XOR, SHL, SRA, SRL, LISTCONCAT, LISTSPLAT, STRCONCAT, CONCAT, EQ, NE, LE, LT, GE, GT, SETOP }; diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 2a46449b213fe8..f191638083c4f5 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -761,6 +761,12 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const { return NewInit; break; + case NOT: + if (IntInit *LHSi = + dyn_cast_or_null(LHS->convertInitializerTo(IntRecTy::get()))) + return IntInit::get(LHSi->getValue() ? 0 : 1); + break; + case HEAD: if (ListInit *LHSl = dyn_cast(LHS)) { assert(!LHSl->empty() && "Empty list in head"); @@ -820,6 +826,7 @@ std::string UnOpInit::getAsString() const { std::string Result; switch (getOpcode()) { case CAST: Result = "!cast<" + getType()->getAsString() + ">"; break; + case NOT: Result = "!not"; break; case HEAD: Result = "!head"; break; case TAIL: Result = "!tail"; break; case SIZE: Result = "!size"; break; @@ -1014,6 +1021,7 @@ Init *BinOpInit::Fold(Record *CurRec) const { case MUL: case AND: case OR: + case XOR: case SHL: case SRA: case SRL: { @@ -1029,7 +1037,8 @@ Init *BinOpInit::Fold(Record *CurRec) const { case ADD: Result = LHSv + RHSv; break; case MUL: Result = LHSv * RHSv; break; case AND: Result = LHSv & RHSv; break; - case OR: Result = LHSv | RHSv; break; + case OR: Result = LHSv | RHSv; break; + case XOR: Result = LHSv ^ RHSv; break; case SHL: Result = (uint64_t)LHSv << (uint64_t)RHSv; break; case SRA: Result = LHSv >> RHSv; break; case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break; @@ -1060,6 +1069,7 @@ std::string BinOpInit::getAsString() const { case MUL: Result = "!mul"; break; case AND: Result = "!and"; break; case OR: Result = "!or"; break; + case XOR: Result = "!xor"; break; case SHL: Result = "!shl"; break; case SRA: Result = "!sra"; break; case SRL: Result = "!srl"; break; diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index d9111f0068c2b5..5db0ecb4b0edad 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -563,8 +563,10 @@ tgtok::TokKind TGLexer::LexExclaim() { .Case("dag", tgtok::XDag) .Case("add", tgtok::XADD) .Case("mul", tgtok::XMUL) + .Case("not", tgtok::XNOT) .Case("and", tgtok::XAND) .Case("or", tgtok::XOR) + .Case("xor", tgtok::XXOR) .Case("shl", tgtok::XSHL) .Case("sra", tgtok::XSRA) .Case("srl", tgtok::XSRL) diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h index 49f37890043f03..534c31382f6359 100644 --- a/llvm/lib/TableGen/TGLexer.h +++ b/llvm/lib/TableGen/TGLexer.h @@ -51,9 +51,10 @@ namespace tgtok { MultiClass, String, Defset, Defvar, If, Then, ElseKW, // !keywords. - XConcat, XADD, XMUL, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XListSplat, - XStrConcat, XCast, XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty, - XIf, XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetOp, XGetOp, + XConcat, XADD, XMUL, XNOT, XAND, XOR, XXOR, XSRA, XSRL, XSHL, + XListConcat, XListSplat, XStrConcat, XCast, XSubst, XForEach, XFoldl, + XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA, XDag, XNe, XLe, + XLt, XGe, XGt, XSetOp, XGetOp, // Integer value. IntVal, diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index eb5053b9c48014..98443c97a1594f 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -908,6 +908,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { default: TokError("unknown operation"); return nullptr; + case tgtok::XNOT: case tgtok::XHead: case tgtok::XTail: case tgtok::XSize: @@ -930,6 +931,11 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { return nullptr; } + break; + case tgtok::XNOT: + Lex.Lex(); // eat the operation + Code = UnOpInit::NOT; + Type = IntRecTy::get(); break; case tgtok::XHead: Lex.Lex(); // eat the operation @@ -1070,6 +1076,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { case tgtok::XMUL: case tgtok::XAND: case tgtok::XOR: + case tgtok::XXOR: case tgtok::XSRA: case tgtok::XSRL: case tgtok::XSHL: @@ -1095,6 +1102,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { case tgtok::XMUL: Code = BinOpInit::MUL; break; case tgtok::XAND: Code = BinOpInit::AND; break; case tgtok::XOR: Code = BinOpInit::OR; break; + case tgtok::XXOR: Code = BinOpInit::XOR; break; case tgtok::XSRA: Code = BinOpInit::SRA; break; case tgtok::XSRL: Code = BinOpInit::SRL; break; case tgtok::XSHL: Code = BinOpInit::SHL; break; @@ -1122,6 +1130,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { break; case tgtok::XAND: case tgtok::XOR: + case tgtok::XXOR: case tgtok::XSRA: case tgtok::XSRL: case tgtok::XSHL: @@ -1239,9 +1248,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { return nullptr; } if (Code != BinOpInit::ADD && Code != BinOpInit::AND && - Code != BinOpInit::OR && Code != BinOpInit::SRA && - Code != BinOpInit::SRL && Code != BinOpInit::SHL && - Code != BinOpInit::MUL) + Code != BinOpInit::OR && Code != BinOpInit::XOR && + Code != BinOpInit::SRA && Code != BinOpInit::SRL && + Code != BinOpInit::SHL && Code != BinOpInit::MUL) ArgType = Resolved; } @@ -1278,7 +1287,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { if (Code == BinOpInit::STRCONCAT || Code == BinOpInit::LISTCONCAT || Code == BinOpInit::CONCAT || Code == BinOpInit::ADD || Code == BinOpInit::AND || Code == BinOpInit::OR || - Code == BinOpInit::MUL) { + Code == BinOpInit::XOR || Code == BinOpInit::MUL) { while (InitList.size() > 2) { Init *RHS = InitList.pop_back_val(); RHS = (BinOpInit::get(Code, InitList.back(), RHS, Type))->Fold(CurRec); @@ -2084,8 +2093,10 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, case tgtok::XDag: case tgtok::XADD: case tgtok::XMUL: + case tgtok::XNOT: case tgtok::XAND: case tgtok::XOR: + case tgtok::XXOR: case tgtok::XSRA: case tgtok::XSRL: case tgtok::XSHL: diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index e7a90a71e76f54..7fb28dbaf5b2bf 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -802,20 +802,29 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, SrcDstRegState |= getKillRegState(IsKill); } + // Make sure the whole register is defined if there are undef components by + // adding an implicit def of the super-reg on the first instruction. + const bool NeedSuperRegDef = NumSubRegs > 1 && IsStore && i == 0; + auto MIB = spillVGPRtoAGPR(ST, MI, Index, i, SubReg, IsKill); if (!MIB.getInstr()) { unsigned FinalReg = SubReg; - if (hasAGPRs(RC)) { + + const bool IsAGPR = hasAGPRs(RC); + if (IsAGPR) { if (!TmpReg) { assert(RS && "Needs to have RegScavenger to spill an AGPR!"); // FIXME: change to scavengeRegisterBackwards() TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); RS->setRegUsed(TmpReg); } - if (IsStore) - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg) + if (IsStore) { + auto AccRead = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg) .addReg(SubReg, getKillRegState(IsKill)); + if (NeedSuperRegDef) + AccRead.addReg(ValueReg, RegState::ImplicitDefine); + } SubReg = TmpReg; } @@ -841,14 +850,21 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, .addImm(0) // swz .addMemOperand(NewMMO); + if (!IsAGPR && NeedSuperRegDef) + MIB.addReg(ValueReg, RegState::ImplicitDefine); + if (!IsStore && TmpReg != AMDGPU::NoRegister) MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32), FinalReg) .addReg(TmpReg, RegState::Kill); + } else { + if (NeedSuperRegDef) + MIB.addReg(ValueReg, RegState::ImplicitDefine); } - if (NumSubRegs > 1) + if (NumSubRegs > 1) { MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); + } } if (UninitStackPtrOffset) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index f0c9fcae971123..c8393a6fa7f90b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -216,7 +216,7 @@ class VALUrVV funct6, RISCVVFormat opv, string opcodestr> (ins VRegOp:$vs1, VRegOp:$vs2, VMaskOp:$vm), opcodestr, "$vd, $vs1, $vs2$vm">; -// op vd, vs1, vs2 +// op vd, vs2, vs1 class VALUVVNoVm funct6, RISCVVFormat opv, string opcodestr> : RVInstVV(B); - if (!BCst) - return nullptr; - ConstantInt *CCst = dyn_cast(C); - if (!CCst) - return nullptr; - ConstantInt *DCst = dyn_cast(D); - if (!DCst) - return nullptr; - ConstantInt *ECst = dyn_cast(E); - if (!ECst) + ConstantInt *BCst, *CCst, *DCst, *ECst; + if (!match(B, m_ConstantInt(BCst)) || !match(C, m_ConstantInt(CCst)) || + !match(D, m_ConstantInt(DCst)) || !match(E, m_ConstantInt(ECst))) return nullptr; ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; @@ -672,11 +664,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // Remaining cases assume at least that B and D are constant, and depend on // their actual values. This isn't strictly necessary, just a "handle the // easy cases for now" decision. - ConstantInt *BCst = dyn_cast(B); - if (!BCst) - return nullptr; - ConstantInt *DCst = dyn_cast(D); - if (!DCst) + ConstantInt *BCst, *DCst; + if (!match(B, m_ConstantInt(BCst)) || !match(D, m_ConstantInt(DCst))) return nullptr; if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) { @@ -717,11 +706,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // We can't simply use C and E because we might actually handle // (icmp ne (A & B), B) & (icmp eq (A & D), D) // with B and D, having a single bit set. - ConstantInt *CCst = dyn_cast(C); - if (!CCst) - return nullptr; - ConstantInt *ECst = dyn_cast(E); - if (!ECst) + ConstantInt *CCst, *ECst; + if (!match(C, m_ConstantInt(CCst)) || !match(E, m_ConstantInt(ECst))) return nullptr; if (PredL != NewCC) CCst = cast(ConstantExpr::getXor(BCst, CCst)); @@ -870,9 +856,10 @@ Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, return nullptr; // TODO support vector splats - ConstantInt *LHSC = dyn_cast(LHS->getOperand(1)); - ConstantInt *RHSC = dyn_cast(RHS->getOperand(1)); - if (!LHSC || !RHSC || !LHSC->isZero() || !RHSC->isZero()) + if (!match(LHS->getOperand(1), m_ConstantInt()) || + !match(RHS->getOperand(1), m_ConstantInt()) || + !match(LHS->getOperand(1), m_Zero()) || + !match(RHS->getOperand(1), m_Zero())) return nullptr; Value *A, *B, *C, *D; @@ -1244,9 +1231,10 @@ Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0); - ConstantInt *LHSC = dyn_cast(LHS->getOperand(1)); - ConstantInt *RHSC = dyn_cast(RHS->getOperand(1)); - if (!LHSC || !RHSC) + + ConstantInt *LHSC, *RHSC; + if (!match(LHS->getOperand(1), m_ConstantInt(LHSC)) || + !match(RHS->getOperand(1), m_ConstantInt(RHSC))) return nullptr; if (LHSC == RHSC && PredL == PredR) { @@ -1844,14 +1832,15 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { } } - if (ConstantInt *AndRHS = dyn_cast(Op1)) { + ConstantInt *AndRHS; + if (match(Op1, m_ConstantInt(AndRHS))) { const APInt &AndRHSMask = AndRHS->getValue(); // Optimize a variety of ((val OP C1) & C2) combinations... if (BinaryOperator *Op0I = dyn_cast(Op0)) { // ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth // of X and OP behaves well when given trunc(C1) and X. - // TODO: Do this for vectors by using m_APInt isntead of m_ConstantInt. + // TODO: Do this for vectors by using m_APInt instead of m_ConstantInt. switch (Op0I->getOpcode()) { default: break; @@ -2621,9 +2610,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { Value *A, *B, *C, *D; if (match(Op0, m_And(m_Value(A), m_Value(C))) && match(Op1, m_And(m_Value(B), m_Value(D)))) { - ConstantInt *C1 = dyn_cast(C); - ConstantInt *C2 = dyn_cast(D); - if (C1 && C2) { // (A & C1)|(B & C2) + // (A & C1)|(B & C2) + ConstantInt *C1, *C2; + if (match(C, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2))) { Value *V1 = nullptr, *V2 = nullptr; if ((C1->getValue() & C2->getValue()).isNullValue()) { // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) @@ -2814,7 +2803,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { // ORs in the hopes that we'll be able to simplify it this way. // (X|C) | V --> (X|V) | C ConstantInt *CI; - if (Op0->hasOneUse() && !isa(Op1) && + if (Op0->hasOneUse() && !match(Op1, m_ConstantInt()) && match(Op0, m_Or(m_Value(A), m_ConstantInt(CI)))) { Value *Inner = Builder.CreateOr(A, Op1); Inner->takeName(Op0); @@ -2835,18 +2824,17 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { } } - // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? -1 : X. + // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X. { Value *X, *Y; - const APInt *ShAmt; Type *Ty = I.getType(); - if (match(&I, m_c_Or(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)), - m_APInt(ShAmt))), - m_Deferred(X))) && - *ShAmt == Ty->getScalarSizeInBits() - 1) { + if (match(&I, m_c_Or(m_OneUse(m_AShr( + m_NSWSub(m_Value(Y), m_Value(X)), + m_SpecificInt(Ty->getScalarSizeInBits() - 1))), + m_Deferred(X)))) { Value *NewICmpInst = Builder.CreateICmpSGT(X, Y); - return SelectInst::Create(NewICmpInst, ConstantInt::getAllOnesValue(Ty), - X); + Value *AllOnes = ConstantInt::getAllOnesValue(Ty); + return SelectInst::Create(NewICmpInst, AllOnes, X); } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index b3e0ef185da34b..1b8519b0d62bfb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -278,26 +278,25 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // are flipping are known to be set, then the xor is just resetting those // bits to zero. We can just knock out bits from the 'and' and the 'xor', // simplifying both of them. - if (Instruction *LHSInst = dyn_cast(I->getOperand(0))) + if (Instruction *LHSInst = dyn_cast(I->getOperand(0))) { + ConstantInt *AndRHS, *XorRHS; if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && - isa(I->getOperand(1)) && - isa(LHSInst->getOperand(1)) && + match(I->getOperand(1), m_ConstantInt(XorRHS)) && + match(LHSInst->getOperand(1), m_ConstantInt(AndRHS)) && (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) { - ConstantInt *AndRHS = cast(LHSInst->getOperand(1)); - ConstantInt *XorRHS = cast(I->getOperand(1)); APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask); Constant *AndC = - ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); + ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); InsertNewInstWith(NewAnd, *I); Constant *XorC = - ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); + ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); return InsertNewInstWith(NewXor, *I); } - + } break; } case Instruction::Select: { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll index 4f9668f8d36970..6fd99b8406d758 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -59,6 +59,7 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill @@ -78,6 +79,7 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill @@ -342,6 +344,7 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill @@ -361,6 +364,7 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill @@ -630,6 +634,7 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill @@ -649,6 +654,7 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir new file mode 100644 index 00000000000000..bdb4d042b45200 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=CHECK -check-prefix=GFX908 %s + +--- +name: spill_a64_kill +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $agpr0_agpr1 + + ; CHECK-LABEL: name: spill_a64_kill + ; CHECK: liveins: $agpr0_agpr1 + ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) +... + +# Make sure there's no verifier error on the undef spill component when the value is killed. + +--- +name: spill_a64_undef_sub1_killed +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $agpr0 + + ; CHECK-LABEL: name: spill_a64_undef_sub1_killed + ; CHECK: liveins: $agpr0 + ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) +... + +--- +name: spill_a64_undef_sub0_killed +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $agpr1 + + ; CHECK-LABEL: name: spill_a64_undef_sub0_killed + ; CHECK: liveins: $agpr1 + ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) +... diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir index c817b977eb9d44..6d5af02301e2f5 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir @@ -78,7 +78,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; EXPANDED: bb.1: @@ -220,7 +220,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc @@ -269,7 +269,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; EXPANDED: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 @@ -320,7 +320,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 @@ -373,7 +373,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 @@ -428,7 +428,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 @@ -487,7 +487,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -562,7 +562,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir index b0bcdb45f51101..d95ee977615a28 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir @@ -79,11 +79,11 @@ body: | ; GCN-LABEL: name: spill_vgpr128_use_subreg ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GCN: renamable $vgpr1 = COPY $vgpr2 - ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) - ; GCN: renamable $vgpr8 = COPY $vgpr2 + ; GCN: renamable $vgpr8 = COPY killed renamable $vgpr1 ; GCN: S_ENDPGM 0, implicit $vgpr8 renamable $vgpr1 = COPY $vgpr2 SI_SPILL_V128_SAVE renamable $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) @@ -108,7 +108,7 @@ body: | ; GCN-LABEL: name: spill_vgpr128_use_kill ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GCN: renamable $vgpr1 = COPY $vgpr2 - ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir new file mode 100644 index 00000000000000..240f6778bff879 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir @@ -0,0 +1,126 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=CHECK -check-prefix=GCN64 %s + +--- +name: spill_v32 +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: spill_v32 + ; CHECK: liveins: $vgpr0 + ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; CHECK: S_NOP 0, implicit $vgpr0 + SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + S_NOP 0, implicit $vgpr0 +... + +--- +name: spill_v32_kill +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: spill_v32_kill + ; CHECK: liveins: $vgpr0 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) +... + +--- +name: spill_v64 +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: spill_v64 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + ; CHECK: S_NOP 0, implicit $vgpr0_vgpr1 + SI_SPILL_V64_SAVE $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) + S_NOP 0, implicit $vgpr0_vgpr1 +... + +--- +name: spill_v64_kill +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: spill_v64_kill + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) +... + +# Make sure there's no verifier error on the undef spill component when the value is killed. + +--- +name: spill_v64_undef_sub1_killed +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: spill_v64_undef_sub1_killed + ; CHECK: liveins: $vgpr0 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) +... + +--- +name: spill_v64_undef_sub0_killed +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr1 + + ; CHECK-LABEL: name: spill_v64_undef_sub0_killed + ; CHECK: liveins: $vgpr1 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) +... diff --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll index 2a686d39382409..027ad93173dfa2 100644 --- a/llvm/test/CodeGen/X86/ctpop-combine.ll +++ b/llvm/test/CodeGen/X86/ctpop-combine.ll @@ -161,12 +161,10 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone { ret i32 %conv } -define i1 @ctpop_trunc_non_power2(i255 %x) { +define i1 @ctpop_trunc_non_power2(i255 %x) nounwind { ; CHECK-LABEL: ctpop_trunc_non_power2: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: movabsq $9223372036854775807, %r8 # imm = 0x7FFFFFFFFFFFFFFF ; CHECK-NEXT: movq %rcx, %r9 ; CHECK-NEXT: andq %r8, %r9 @@ -193,7 +191,6 @@ define i1 @ctpop_trunc_non_power2(i255 %x) { ; CHECK-NEXT: setne %al ; CHECK-NEXT: andb %cl, %al ; CHECK-NEXT: popq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq %a = call i255 @llvm.ctpop.i255(i255 %x) %b = trunc i255 %a to i8 ; largest value from ctpop is 255, fits in 8 bits. diff --git a/llvm/test/TableGen/arithmetic.td b/llvm/test/TableGen/arithmetic.td index 50007c5f18a266..db268eef2f857e 100644 --- a/llvm/test/TableGen/arithmetic.td +++ b/llvm/test/TableGen/arithmetic.td @@ -7,6 +7,7 @@ // CHECK: bits<8> add = { 0, 1, 0, 0, 0, 0, 0, 0 }; // CHECK: bits<8> and = { 0, 0, 0, 0, 0, 0, 0, 1 }; // CHECK: bits<8> or = { 0, 0, 1, 1, 1, 1, 1, 1 }; +// CHECK: bits<8> xor = { 0, 0, 1, 1, 1, 1, 1, 0 }; // CHECK: bits<8> srl = { 0, 0, 0, 1, 1, 1, 1, 1 }; // CHECK: bits<8> sra = { 0, 0, 0, 1, 1, 1, 1, 1 }; // CHECK: bits<8> shl = { 0, 1, 1, 1, 1, 1, 1, 0 }; @@ -17,6 +18,7 @@ class A a, bits<2> b> { bits<8> add = !add(a, b); bits<8> and = !and(a, b); bits<8> or = !or(a, b); + bits<8> xor = !xor(a, b); bits<8> srl = !srl(a, b); bits<8> sra = !sra(a, b); bits<8> shl = !shl(a, b); diff --git a/llvm/test/TableGen/if.td b/llvm/test/TableGen/if.td index 1fbee6966ff38b..a3148494dd1394 100644 --- a/llvm/test/TableGen/if.td +++ b/llvm/test/TableGen/if.td @@ -97,6 +97,39 @@ def E2d : E2<0>; def EXd1 : EX<1, E1d, E2d>; def EXd2 : EX<0, E1d, E2d>; +// CHECK: def Not1 +// CHECK: Result1a = "OK" +// CHECK: Result1b = "OK" +// CHECK: Result1c = "OK" +// CHECK: Result1d = "OK" +// CHECK: Result1e = "OK" +// CHECK: def Not2 +// CHECK: Result2a = "OK" +// CHECK: Result2b = "OK" +// CHECK: Result2c = "OK" +// CHECK: Result2d = "OK" + +def Not1 { + bit true = 1; + string Result1a = !if(true, "OK", "not OK"); + string Result1b = !if(!not(true), "not OK", "OK"); + + bit false = 0; + string Result1c = !if(false, "not OK", "OK"); + string Result1d = !if(!not(false), "OK", "not OK"); + string Result1e = !if(!not(!not(false)), "not OK", "OK"); +} + +def Not2 { + int one = 1; + string Result2a = !if(one, "OK", "not OK"); + string Result2b = !if(!not(one), "not OK", "OK"); + + int zero = 0; + string Result2c = !if(zero, "not OK", "OK"); + string Result2d = !if(!not(zero), "OK", "not OK"); +} + // CHECK: def One // CHECK-NEXT: list first = [1, 2, 3]; // CHECK-NEXT: list rest = [1, 2, 3]; diff --git a/llvm/test/TableGen/math.td b/llvm/test/TableGen/math.td index f7bda1ed13779f..24a0e53a7916fa 100644 --- a/llvm/test/TableGen/math.td +++ b/llvm/test/TableGen/math.td @@ -71,6 +71,12 @@ def v1a : Int; // CHECK: Value = 84 def v84 : Int; +// CHECK: def v9 +// CHECK: Value = 9 +def v9 : Int; + // CHECK: def v924 // CHECK: Value = 924 def v924 : Int; + + diff --git a/llvm/test/tools/llvm-symbolizer/pdb/missing_pdb.test b/llvm/test/tools/llvm-symbolizer/pdb/missing_pdb.test index b83b46a752133c..7e7fa79c4b382d 100644 --- a/llvm/test/tools/llvm-symbolizer/pdb/missing_pdb.test +++ b/llvm/test/tools/llvm-symbolizer/pdb/missing_pdb.test @@ -1,4 +1,4 @@ -RUN: llvm-symbolizer 0x401000 0x401001 -obj="%p/Inputs/missing_pdb.exe" 2>%t.err \ +RUN: not llvm-symbolizer 0x401000 0x401001 -obj="%p/Inputs/missing_pdb.exe" 2>%t.err \ RUN: | FileCheck %s RUN: FileCheck --check-prefix=ERROR %s < %t.err diff --git a/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp b/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp index f298e639bf7fcf..b8464418320bdc 100644 --- a/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp +++ b/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp @@ -884,13 +884,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o, } void RISCVCompressInstEmitter::run(raw_ostream &o) { - Record *CompressClass = Records.getClass("CompressPat"); - assert(CompressClass && "Compress class definition missing!"); - std::vector Insts; - for (const auto &D : Records.getDefs()) { - if (D.second->isSubClassOf(CompressClass)) - Insts.push_back(D.second.get()); - } + std::vector Insts = Records.getAllDerivedDefinitions("CompressPat"); // Process the CompressPat definitions, validating them as we do so. for (unsigned i = 0, e = Insts.size(); i != e; ++i) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 6b60c2f79b0109..c024e19b500916 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -3097,6 +3097,7 @@ def SubViewOp : BaseOpWithOffsetSizesAndStrides< }]; let hasCanonicalizer = 1; + let hasFolder = 1; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 82058fdcc03c83..8fe45cbb1a1371 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -2531,8 +2531,10 @@ parseListOfOperandsOrIntegers(OpAsmParser &parser, OperationState &result, if (failed(parser.parseLSquare())) return failure(); // 0-D. - if (succeeded(parser.parseOptionalRSquare())) + if (succeeded(parser.parseOptionalRSquare())) { + result.addAttribute(attrName, parser.getBuilder().getArrayAttr({})); return success(); + } SmallVector attrVals; while (true) { @@ -3333,6 +3335,13 @@ void SubViewOp::getCanonicalizationPatterns(OwningRewritePatternList &results, SubViewOpMemRefCastFolder>(context); } +OpFoldResult SubViewOp::fold(ArrayRef operands) { + if (getResultRank() == 0 && getSourceRank() == 0) + return getViewSource(); + + return {}; +} + //===----------------------------------------------------------------------===// // SubTensorOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 219c3bc84d5706..da7394eae7846c 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -827,6 +827,9 @@ func @memref_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %21 = subview %20[0, 0, 0][1, 16, 4][1, 1, 1] : memref<8x16x4xf32> to memref<16x4xf32> %22 = subview %20[3, 4, 2][1, 6, 3][1, 1, 1] : memref<8x16x4xf32> to memref<6x3xf32, offset: 210, strides: [4, 1]> + + %23 = alloc() : memref + %78 = subview %23[] [] [] : memref to memref return } diff --git a/mlir/test/Transforms/constant-fold.mlir b/mlir/test/Transforms/constant-fold.mlir index 36fa234213ea1e..c75c89877830eb 100644 --- a/mlir/test/Transforms/constant-fold.mlir +++ b/mlir/test/Transforms/constant-fold.mlir @@ -744,3 +744,12 @@ func @splat_fold() -> (vector<4xf32>, tensor<4xf32>) { // CHECK-NEXT: [[T:%.*]] = constant dense<1.000000e+00> : tensor<4xf32> // CHECK-NEXT: return [[V]], [[T]] : vector<4xf32>, tensor<4xf32> } + +// ----- + +// CHECK-LABEL: func @subview_scalar_fold +func @subview_scalar_fold(%arg0: memref) -> memref { + // CHECK-NOT: subview + %c = subview %arg0[] [] [] : memref to memref + return %c : memref +} diff --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt index 7483e4e5c0eae1..3882b777f5b1e7 100644 --- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt +++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt @@ -16,7 +16,7 @@ # as of rocm-3.7, hsa is installed with cmake packages and kmt is found via hsa find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm) if (NOT ${hsa-runtime64_FOUND}) - libomptarget_say("Not building HSA plugin: hsa-runtime64 not found") + libomptarget_say("Not building AMDGPU plugin: hsa-runtime64 not found") return() endif() @@ -26,9 +26,15 @@ if(NOT LIBOMPTARGET_DEP_LIBELF_FOUND) endif() if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux") - libomptarget_say("Not building amdgpu plugin: only support amdgpu in Linux x86_64, ppc64le, or aarch64 hosts.") + libomptarget_say("Not building AMDGPU plugin: only support AMDGPU in Linux x86_64, ppc64le, or aarch64 hosts.") return() endif() + +if (NOT LLVM_MAIN_INCLUDE_DIR) + libomptarget_say("Not building AMDGPU plugin: Missing definition for LLVM_MAIN_INCLUDE_DIR") + return() +endif() + libomptarget_say("Building amdgpu offloading plugin") ################################################################################ @@ -44,6 +50,7 @@ endif() include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/impl + ${LLVM_MAIN_INCLUDE_DIR} ) add_library(omptarget.rtl.amdgpu SHARED @@ -54,7 +61,7 @@ add_library(omptarget.rtl.amdgpu SHARED impl/system.cpp impl/utils.cpp impl/msgpack.cpp - src/rtl.cpp + src/rtl.cpp ) # Install plugin under the lib destination folder. diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 9ba27560d140c6..e0509a5f2b32f1 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -38,66 +38,7 @@ #include "Debug.h" #include "omptargetplugin.h" -// Get static gpu grid values from clang target-specific constants managed -// in the header file llvm/Frontend/OpenMP/OMPGridValues.h -// Copied verbatim to meet the requirement that libomptarget builds without -// a copy of llvm checked out nearby -namespace llvm { -namespace omp { -enum GVIDX { - /// The maximum number of workers in a kernel. - /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z - GV_Threads, - /// The size reserved for data in a shared memory slot. - GV_Slot_Size, - /// The default value of maximum number of threads in a worker warp. - GV_Warp_Size, - /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size - /// for NVPTX. - GV_Warp_Size_32, - /// The number of bits required to represent the max number of threads in warp - GV_Warp_Size_Log2, - /// GV_Warp_Size * GV_Slot_Size, - GV_Warp_Slot_Size, - /// the maximum number of teams. - GV_Max_Teams, - /// Global Memory Alignment - GV_Mem_Align, - /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) - GV_Warp_Size_Log2_Mask, - // An alternative to the heavy data sharing infrastructure that uses global - // memory is one that uses device __shared__ memory. The amount of such space - // (in bytes) reserved by the OpenMP runtime is noted here. - GV_SimpleBufferSize, - // The absolute maximum team size for a working group - GV_Max_WG_Size, - // The default maximum team size for a working group - GV_Default_WG_Size, - // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN. - GV_Max_Warp_Number, - /// The slot size that should be reserved for a working warp. - /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) - GV_Warp_Size_Log2_MaskL -}; - -static constexpr unsigned AMDGPUGpuGridValues[] = { - 448, // GV_Threads - 256, // GV_Slot_Size - 64, // GV_Warp_Size - 32, // GV_Warp_Size_32 - 6, // GV_Warp_Size_Log2 - 64 * 256, // GV_Warp_Slot_Size - 128, // GV_Max_Teams - 256, // GV_Mem_Align - 63, // GV_Warp_Size_Log2_Mask - 896, // GV_SimpleBufferSize - 1024, // GV_Max_WG_Size, - 256, // GV_Defaut_WG_Size - 1024 / 64, // GV_Max_WG_Size / GV_WarpSize - 63 // GV_Warp_Size_Log2_MaskL -}; -} // namespace omp -} // namespace llvm +#include "llvm/Frontend/OpenMP/OMPGridValues.h" #ifndef TARGET_NAME #define TARGET_NAME AMDHSA