Skip to content

Commit

Permalink
merge main into amd-staging
Browse files Browse the repository at this point in the history
Change-Id: Ifd894fe72a12b977fa159bde1b206637f42dffa9
  • Loading branch information
Jenkins committed Nov 10, 2024
2 parents 9a0cb31 + 7085ac8 commit e553a98
Show file tree
Hide file tree
Showing 35 changed files with 1,352 additions and 484 deletions.
8 changes: 8 additions & 0 deletions clang/test/Driver/riscv-profiles.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
// RVA22U64: "-target-feature" "+f"
// RVA22U64: "-target-feature" "+d"
// RVA22U64: "-target-feature" "+c"
// RVA22U64: "-target-feature" "+b"
// RVA22U64: "-target-feature" "+zic64b"
// RVA22U64: "-target-feature" "+zicbom"
// RVA22U64: "-target-feature" "+zicbop"
Expand All @@ -83,6 +84,7 @@
// RVA22S64: "-target-feature" "+f"
// RVA22S64: "-target-feature" "+d"
// RVA22S64: "-target-feature" "+c"
// RVA22S64: "-target-feature" "+b"
// RVA22S64: "-target-feature" "+zic64b"
// RVA22S64: "-target-feature" "+zicbom"
// RVA22S64: "-target-feature" "+zicbop"
Expand Down Expand Up @@ -118,6 +120,7 @@
// RVA23U64: "-target-feature" "+f"
// RVA23U64: "-target-feature" "+d"
// RVA23U64: "-target-feature" "+c"
// RVA23U64: "-target-feature" "+b"
// RVA23U64: "-target-feature" "+v"
// RVA23U64: "-target-feature" "+zic64b"
// RVA23U64: "-target-feature" "+zicbom"
Expand Down Expand Up @@ -156,6 +159,7 @@
// RVA23S64: "-target-feature" "+f"
// RVA23S64: "-target-feature" "+d"
// RVA23S64: "-target-feature" "+c"
// RVA23S64: "-target-feature" "+b"
// RVA23S64: "-target-feature" "+v"
// RVA23S64: "-target-feature" "+h"
// RVA23S64: "-target-feature" "+zic64b"
Expand Down Expand Up @@ -217,6 +221,7 @@
// RVB23U64: "-target-feature" "+f"
// RVB23U64: "-target-feature" "+d"
// RVB23U64: "-target-feature" "+c"
// RVB23U64: "-target-feature" "+b"
// RVB23U64: "-target-feature" "+zic64b"
// RVB23U64: "-target-feature" "+zicbom"
// RVB23U64: "-target-feature" "+zicbop"
Expand Down Expand Up @@ -249,6 +254,7 @@
// RVB23S64: "-target-feature" "+f"
// RVB23S64: "-target-feature" "+d"
// RVB23S64: "-target-feature" "+c"
// RVB23S64: "-target-feature" "+b"
// RVB23S64: "-target-feature" "+zic64b"
// RVB23S64: "-target-feature" "+zicbom"
// RVB23S64: "-target-feature" "+zicbop"
Expand Down Expand Up @@ -290,6 +296,7 @@
// RUN: %clang --target=riscv32 -### -c %s 2>&1 -march=rvm23u32 -menable-experimental-extensions \
// RUN: | FileCheck -check-prefix=RVM23U32 %s
// RVM23U32: "-target-feature" "+m"
// RVM23U32: "-target-feature" "+b"
// RVM23U32: "-target-feature" "+zicbop"
// RVM23U32: "-target-feature" "+zicond"
// RVM23U32: "-target-feature" "+zicsr"
Expand All @@ -309,6 +316,7 @@
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+f"
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+d"
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+c"
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+b"
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+zicbom"
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+zicbop"
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+zicboz"
Expand Down
6 changes: 3 additions & 3 deletions compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
//==============================================================================
//
// NOTE!
// llvm/lib/ProfileData/CtxInstrContextNode.h and
// llvm/include/llvm/ProfileData/CtxInstrContextNode.h and
// compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
// must be exact copies of each other
// must be exact copies of each other.
//
// compiler-rt creates these objects as part of the instrumentation runtime for
// contextual profiling. LLVM only consumes them to convert a contextual tree
Expand Down Expand Up @@ -114,4 +114,4 @@ class ContextNode final {
};
} // namespace ctx_profile
} // namespace llvm
#endif
#endif
12 changes: 7 additions & 5 deletions lld/test/ELF/aarch64-feature-pac.s
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,14 @@
# PACDYN-NOT: 0x0000000070000001 (AARCH64_BTI_PLT)
# PACDYN-NOT: 0x0000000070000003 (AARCH64_PAC_PLT)

## Turn on PAC entries with the -z pac-plt command line option. There are no
## warnings in this case as the choice to use PAC in PLT entries is orthogonal
## to the choice of using PAC in relocatable objects. The presence of the PAC
## .note.gnu.property is an indication of preference by the relocatable object.
## Turn on PAC entries with the -z pac-plt command line option. For files w/o
## GNU_PROPERTY_AARCH64_FEATURE_1_PAC set in GNU_PROPERTY_AARCH64_FEATURE_1_AND
## property, emit a warning.

# RUN: ld.lld %t.o %t2.o -z pac-plt %t.so -o %tpacplt.exe 2>&1 | FileCheck -DFILE=%t2.o --check-prefix WARN %s

# WARN: warning: [[FILE]]: -z pac-plt: file does not have GNU_PROPERTY_AARCH64_FEATURE_1_PAC property

# RUN: ld.lld %t.o %t2.o -z pac-plt %t.so -o %tpacplt.exe
# RUN: llvm-readelf -n %tpacplt.exe | FileCheck --check-prefix=PACPROP %s
# RUN: llvm-readelf --dynamic-table %tpacplt.exe | FileCheck --check-prefix PACDYN2 %s
# RUN: llvm-objdump --no-print-imm-hex -d --mattr=+v8.3a --no-show-raw-insn %tpacplt.exe | FileCheck --check-prefix PACPLT %s
Expand Down
64 changes: 64 additions & 0 deletions llvm/docs/NVPTXUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,70 @@ described in the ``s2g.tile`` mode intrinsics above.
For more information, refer PTX ISA
`<https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor>`_.

'``llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.[1-5]d``'
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Syntax:
"""""""

.. code-block:: llvm
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.1d(ptr %tensor_map, i32 %d0, i64 %ch, i1 %flag_ch)
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.2d(..., i32 %d0, i32 %d1, ...)
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.3d(..., i32 %d0, i32 %d1, i32 %d2, ...)
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.4d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, ...)
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.5d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, ...)
Overview:
"""""""""

The '``@llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.[1-5]d``' intrinsics
correspond to the ``cp.async.bulk.prefetch.tensor.[1-5]d.L2.global*`` set
of PTX instructions. These instructions initiate an asynchronous prefetch
of tensor data from global memory to the L2 cache. In tile mode, the
multi-dimensional layout of the source tensor is preserved at the destination.
The dimension of the tensor data ranges from 1d to 5d with the coordinates
specified by the ``i32 %d0 ... i32 %d4`` arguments.

* The last argument to these intrinsics is a boolean flag
indicating support for cache_hint. This flag argument must
be a compile-time constant. When set, it indicates a valid
cache_hint (``i64 %ch``) and generates the ``.L2::cache_hint``
variant of the PTX instruction.

For more information, refer PTX ISA
`<https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-cp-async-bulk-prefetch-tensor>`_.

'``llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.[1-5]d``'
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Syntax:
"""""""

.. code-block:: llvm
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.3d(ptr %tensor_map, i32 %d0, i32 %d1, i32 %d2, i16 %im2col0, i64 %ch, i1 %flag_ch)
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.4d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, i16 %im2col0, i16 %im2col1, ...)
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.5d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, i16 %im2col0, i16 %im2col1, i16 %im2col2, ...)
Overview:
"""""""""

The '``@llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.[1-5]d``' intrinsics
correspond to the ``cp.async.bulk.prefetch.tensor.[1-5]d.L2.global*`` set
of PTX instructions. These instructions initiate an asynchronous prefetch
of tensor data from global memory to the L2 cache. In im2col mode, some
dimensions of the source tensor are unrolled into a single dimensional
column at the destination. In this mode, the tensor has to be at least
three-dimensional. Along with the tensor coordinates, im2col offsets are
also specified (denoted by ``i16 im2col0...i16 %im2col2``). The number
of im2col offsets is two less than the number of dimensions of the tensor
operation. The last argument to these intrinsics is a boolean flag, with
the same functionality as described in the ``tile`` mode intrinsics above.

For more information, refer PTX ISA
`<https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-cp-async-bulk-prefetch-tensor>`_.

Other Intrinsics
----------------

Expand Down
7 changes: 7 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,13 @@ class LegalizeRuleSet {
return minScalar(TypeIdx, MinTy).maxScalar(TypeIdx, MaxTy);
}

LegalizeRuleSet &clampScalar(bool Pred, unsigned TypeIdx, const LLT MinTy,
const LLT MaxTy) {
if (!Pred)
return *this;
return clampScalar(TypeIdx, MinTy, MaxTy);
}

/// Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet &clampScalarOrElt(unsigned TypeIdx, const LLT MinTy,
const LLT MaxTy) {
Expand Down
97 changes: 2 additions & 95 deletions llvm/include/llvm/ExecutionEngine/Orc/Core.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/CoreContainers.h"
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
#include "llvm/ExecutionEngine/Orc/MaterializationUnit.h"
#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h"
#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
Expand All @@ -39,7 +41,6 @@ namespace orc {
// Forward declare some classes.
class AsynchronousSymbolQuery;
class ExecutionSession;
class MaterializationUnit;
class MaterializationResponsibility;
class JITDylib;
class ResourceTracker;
Expand Down Expand Up @@ -109,23 +110,6 @@ class ResourceManager {
ResourceKey SrcK) = 0;
};

/// A set of symbol names (represented by SymbolStringPtrs for
// efficiency).
using SymbolNameSet = DenseSet<SymbolStringPtr>;

/// A vector of symbol names.
using SymbolNameVector = std::vector<SymbolStringPtr>;

/// A map from symbol names (as SymbolStringPtrs) to JITSymbols
/// (address/flags pairs).
using SymbolMap = DenseMap<SymbolStringPtr, ExecutorSymbolDef>;

/// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags.
using SymbolFlagsMap = DenseMap<SymbolStringPtr, JITSymbolFlags>;

/// A map from JITDylibs to sets of symbols.
using SymbolDependenceMap = DenseMap<JITDylib *, SymbolNameSet>;

/// Lookup flags that apply to each dylib in the search order for a lookup.
///
/// If MatchHiddenSymbolsOnly is used (the default) for a given dylib, then
Expand Down Expand Up @@ -682,83 +666,6 @@ class MaterializationResponsibility {
SymbolStringPtr InitSymbol;
};

/// A MaterializationUnit represents a set of symbol definitions that can
/// be materialized as a group, or individually discarded (when
/// overriding definitions are encountered).
///
/// MaterializationUnits are used when providing lazy definitions of symbols to
/// JITDylibs. The JITDylib will call materialize when the address of a symbol
/// is requested via the lookup method. The JITDylib will call discard if a
/// stronger definition is added or already present.
class MaterializationUnit {
friend class ExecutionSession;
friend class JITDylib;

public:
static char ID;

struct Interface {
Interface() = default;
Interface(SymbolFlagsMap InitalSymbolFlags, SymbolStringPtr InitSymbol)
: SymbolFlags(std::move(InitalSymbolFlags)),
InitSymbol(std::move(InitSymbol)) {
assert((!this->InitSymbol || this->SymbolFlags.count(this->InitSymbol)) &&
"If set, InitSymbol should appear in InitialSymbolFlags map");
}

SymbolFlagsMap SymbolFlags;
SymbolStringPtr InitSymbol;
};

MaterializationUnit(Interface I)
: SymbolFlags(std::move(I.SymbolFlags)),
InitSymbol(std::move(I.InitSymbol)) {}
virtual ~MaterializationUnit() = default;

/// Return the name of this materialization unit. Useful for debugging
/// output.
virtual StringRef getName() const = 0;

/// Return the set of symbols that this source provides.
const SymbolFlagsMap &getSymbols() const { return SymbolFlags; }

/// Returns the initialization symbol for this MaterializationUnit (if any).
const SymbolStringPtr &getInitializerSymbol() const { return InitSymbol; }

/// Implementations of this method should materialize all symbols
/// in the materialzation unit, except for those that have been
/// previously discarded.
virtual void
materialize(std::unique_ptr<MaterializationResponsibility> R) = 0;

/// Called by JITDylibs to notify MaterializationUnits that the given symbol
/// has been overridden.
void doDiscard(const JITDylib &JD, const SymbolStringPtr &Name) {
SymbolFlags.erase(Name);
if (InitSymbol == Name) {
DEBUG_WITH_TYPE("orc", {
dbgs() << "In " << getName() << ": discarding init symbol \""
<< *Name << "\"\n";
});
InitSymbol = nullptr;
}
discard(JD, std::move(Name));
}

protected:
SymbolFlagsMap SymbolFlags;
SymbolStringPtr InitSymbol;

private:
virtual void anchor();

/// Implementations of this method should discard the given symbol
/// from the source (e.g. if the source is an LLVM IR Module and the
/// symbol is a function, delete the function body or mark it available
/// externally).
virtual void discard(const JITDylib &JD, const SymbolStringPtr &Name) = 0;
};

/// A MaterializationUnit implementation for pre-existing absolute symbols.
///
/// All symbols will be resolved and marked ready as soon as the unit is
Expand Down
47 changes: 47 additions & 0 deletions llvm/include/llvm/ExecutionEngine/Orc/CoreContainers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//===---- CoreContainers.h - Symbol Containers for Core APIs ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Symbol container types for core ORC APIs.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_EXECUTIONENGINE_ORC_CORECONTAINERS_H
#define LLVM_EXECUTIONENGINE_ORC_CORECONTAINERS_H

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"

#include <vector>

namespace llvm::orc {

class JITDylib;

/// A set of symbol names (represented by SymbolStringPtrs for
// efficiency).
using SymbolNameSet = DenseSet<SymbolStringPtr>;

/// A vector of symbol names.
using SymbolNameVector = std::vector<SymbolStringPtr>;

/// A map from symbol names (as SymbolStringPtrs) to JITSymbols
/// (address/flags pairs).
using SymbolMap = DenseMap<SymbolStringPtr, ExecutorSymbolDef>;

/// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags.
using SymbolFlagsMap = DenseMap<SymbolStringPtr, JITSymbolFlags>;

/// A map from JITDylibs to sets of symbols.
using SymbolDependenceMap = DenseMap<JITDylib *, SymbolNameSet>;

} // End namespace llvm::orc

#endif // LLVM_EXECUTIONENGINE_ORC_CORECONTAINERS_H
Loading

0 comments on commit e553a98

Please sign in to comment.