forked from GPUOpen-Drivers/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merged master:d0b312955f12 into amd-gfx:4c4f72e10e54
Local branch amd-gfx 4c4f72e Merged master:22cbe40fa997 into amd-gfx:b419d0d534fb Remote branch master d0b3129 [libomptarget] Implement host plugin for amdgpu
- Loading branch information
Showing
24 changed files
with
5,203 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file added
BIN
+480 Bytes
llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_x86-64_was_private_extern.o
Binary file not shown.
9 changes: 9 additions & 0 deletions
9
llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_was_private_extern.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# RUN: llvm-jitlink -noexec %S/Inputs/MachO_x86-64_was_private_extern.o | ||
# | ||
# Perform a no-exec link of MachO_x86-64_was_private_extern.o and verify that | ||
# it does not generate any errors despite the presence of a 'was private | ||
# extern' symbol (N_PEXT set, N_EXT unset). | ||
# | ||
# The test case for this is a relocatable object file rather than assembly as | ||
# objects must be run through ld64's 'ld -r' mode to produce them and we can't | ||
# assume that that is available everywhere. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
##===----------------------------------------------------------------------===## | ||
# | ||
# The LLVM Compiler Infrastructure | ||
# | ||
# This file is dual licensed under the MIT and the University of Illinois Open | ||
# Source Licenses. See LICENSE.txt for details. | ||
# | ||
##===----------------------------------------------------------------------===## | ||
# | ||
# Build a plugin for an AMDGPU machine if available. | ||
# | ||
##===----------------------------------------------------------------------===## | ||
|
||
################################################################################ | ||
|
||
if(NOT LIBOMPTARGET_DEP_LIBELF_FOUND) | ||
libomptarget_say("Not building AMDGPU plugin: LIBELF not found") | ||
return() | ||
endif() | ||
|
||
if(NOT ROCM_DIR) | ||
libomptarget_say("Not building AMDGPU plugin: ROCM_DIR is not set") | ||
return() | ||
endif() | ||
|
||
set(LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS ${ROCM_DIR}/hsa/include ${ROCM_DIR}/hsa/include/hsa) | ||
set(LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS ${ROCM_DIR}/hsa/lib) | ||
set(LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS ${ROCM_DIR}/lib) | ||
|
||
mark_as_advanced( LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS) | ||
|
||
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux") | ||
libomptarget_say("Not building amdgpu plugin: only support amdgpu in Linux x86_64, ppc64le, or aarch64 hosts.") | ||
return() | ||
endif() | ||
libomptarget_say("Building amdgpu offloading plugin using ROCM_DIR = ${ROCM_DIR}") | ||
|
||
libomptarget_say("LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS: ${LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS}") | ||
libomptarget_say("LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS ${LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS}") | ||
libomptarget_say("LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS: ${LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS}") | ||
|
||
################################################################################ | ||
# Define the suffix for the runtime messaging dumps. | ||
add_definitions(-DTARGET_NAME=AMDGPU) | ||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(ppc64le)|(aarch64)$") | ||
add_definitions(-DLITTLEENDIAN_CPU=1) | ||
endif() | ||
|
||
if(CMAKE_BUILD_TYPE MATCHES Debug) | ||
add_definitions(-DDEBUG) | ||
endif() | ||
|
||
include_directories( | ||
${LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS} | ||
${CMAKE_CURRENT_SOURCE_DIR}/impl | ||
) | ||
|
||
add_library(omptarget.rtl.amdgpu SHARED | ||
impl/atmi.cpp | ||
impl/atmi_interop_hsa.cpp | ||
impl/data.cpp | ||
impl/machine.cpp | ||
impl/system.cpp | ||
impl/utils.cpp | ||
impl/msgpack.cpp | ||
src/rtl.cpp | ||
) | ||
|
||
# Install plugin under the lib destination folder. | ||
# When we build for debug, OPENMP_LIBDIR_SUFFIX get set to -debug | ||
install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "lib${OPENMP_LIBDIR_SUFFIX}") | ||
|
||
target_link_libraries( | ||
omptarget.rtl.amdgpu | ||
-lpthread -ldl -Wl,-rpath,${OPENMP_INSTALL_LIBDIR} | ||
-L${LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS} -L${LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS} -lhsa-runtime64 -lhsakmt -Wl,-rpath,${LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS},-rpath,${LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS} | ||
-lelf | ||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports" | ||
"-Wl,-z,defs" | ||
) | ||
|
||
# Report to the parent scope that we are building a plugin for amdgpu | ||
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa" PARENT_SCOPE) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
/*===-------------------------------------------------------------------------- | ||
* ATMI (Asynchronous Task and Memory Interface) | ||
* | ||
* This file is distributed under the MIT License. See LICENSE.txt for details. | ||
*===------------------------------------------------------------------------*/ | ||
#include "rt.h" | ||
/* | ||
* Initialize/Finalize | ||
*/ | ||
atmi_status_t atmi_init() { return core::Runtime::Initialize(); } | ||
|
||
atmi_status_t atmi_finalize() { return core::Runtime::Finalize(); } | ||
|
||
/* | ||
* Machine Info | ||
*/ | ||
atmi_machine_t *atmi_machine_get_info() { | ||
return core::Runtime::GetMachineInfo(); | ||
} | ||
|
||
/* | ||
* Modules | ||
*/ | ||
atmi_status_t atmi_module_register_from_memory_to_place( | ||
void *module_bytes, size_t module_size, atmi_place_t place, | ||
atmi_status_t (*on_deserialized_data)(void *data, size_t size, | ||
void *cb_state), | ||
void *cb_state) { | ||
return core::Runtime::getInstance().RegisterModuleFromMemory( | ||
module_bytes, module_size, place, on_deserialized_data, cb_state); | ||
} | ||
|
||
/* | ||
* Data | ||
*/ | ||
atmi_status_t atmi_memcpy(void *dest, const void *src, size_t size) { | ||
return core::Runtime::Memcpy(dest, src, size); | ||
} | ||
|
||
atmi_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); } | ||
|
||
atmi_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place) { | ||
return core::Runtime::Malloc(ptr, size, place); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
/*===-------------------------------------------------------------------------- | ||
* ATMI (Asynchronous Task and Memory Interface) | ||
* | ||
* This file is distributed under the MIT License. See LICENSE.txt for details. | ||
*===------------------------------------------------------------------------*/ | ||
#ifndef INCLUDE_ATMI_H_ | ||
#define INCLUDE_ATMI_H_ | ||
|
||
#define ROCM_VERSION_MAJOR 3 | ||
#define ROCM_VERSION_MINOR 2 | ||
|
||
/** \defgroup enumerations Enumerated Types | ||
* @{ | ||
*/ | ||
|
||
/** | ||
* @brief Status codes. | ||
*/ | ||
typedef enum atmi_status_t { | ||
/** | ||
* The function has been executed successfully. | ||
*/ | ||
ATMI_STATUS_SUCCESS = 0, | ||
/** | ||
* A undocumented error has occurred. | ||
*/ | ||
ATMI_STATUS_UNKNOWN = 1, | ||
/** | ||
* A generic error has occurred. | ||
*/ | ||
ATMI_STATUS_ERROR = 2, | ||
} atmi_status_t; | ||
|
||
/** | ||
* @brief Device Types. | ||
*/ | ||
typedef enum atmi_devtype_s { | ||
ATMI_DEVTYPE_CPU = 0x0001, | ||
ATMI_DEVTYPE_iGPU = 0x0010, // Integrated GPU | ||
ATMI_DEVTYPE_dGPU = 0x0100, // Discrete GPU | ||
ATMI_DEVTYPE_GPU = ATMI_DEVTYPE_iGPU | ATMI_DEVTYPE_dGPU, // Any GPU | ||
ATMI_DEVTYPE_ALL = 0x111 // Union of all device types | ||
} atmi_devtype_t; | ||
|
||
/** | ||
* @brief Memory Access Type. | ||
*/ | ||
typedef enum atmi_memtype_s { | ||
ATMI_MEMTYPE_FINE_GRAINED = 0, | ||
ATMI_MEMTYPE_COARSE_GRAINED = 1, | ||
ATMI_MEMTYPE_ANY | ||
} atmi_memtype_t; | ||
|
||
/** | ||
* @brief ATMI Memory Fences for Tasks. | ||
*/ | ||
typedef enum atmi_task_fence_scope_s { | ||
/** | ||
* No memory fence applied; external fences have to be applied around the task | ||
* launch/completion. | ||
*/ | ||
ATMI_FENCE_SCOPE_NONE = 0, | ||
/** | ||
* The fence is applied to the device. | ||
*/ | ||
ATMI_FENCE_SCOPE_DEVICE = 1, | ||
/** | ||
* The fence is applied to the entire system. | ||
*/ | ||
ATMI_FENCE_SCOPE_SYSTEM = 2 | ||
} atmi_task_fence_scope_t; | ||
|
||
/** @} */ | ||
|
||
/** \defgroup common Common ATMI Structures | ||
* @{ | ||
*/ | ||
|
||
/** | ||
* @brief ATMI Compute Place | ||
*/ | ||
typedef struct atmi_place_s { | ||
/** | ||
* The node in a cluster where computation should occur. | ||
* Default is node_id = 0 for local computations. | ||
*/ | ||
unsigned int node_id; | ||
/** | ||
* Device type: CPU, GPU or DSP | ||
*/ | ||
atmi_devtype_t type; | ||
/** | ||
* The device ordinal number ordered by runtime; -1 for any | ||
*/ | ||
int device_id; | ||
} atmi_place_t; | ||
|
||
/** | ||
* @brief ATMI Memory Place | ||
*/ | ||
typedef struct atmi_mem_place_s { | ||
/** | ||
* The node in a cluster where computation should occur. | ||
* Default is node_id = 0 for local computations. | ||
*/ | ||
unsigned int node_id; | ||
/** | ||
* Device type: CPU, GPU or DSP | ||
*/ | ||
atmi_devtype_t dev_type; | ||
/** | ||
* The device ordinal number ordered by runtime; -1 for any | ||
*/ | ||
int dev_id; | ||
// atmi_memtype_t mem_type; // Fine grained or Coarse grained | ||
/** | ||
* The memory space/region ordinal number ordered by runtime; -1 for any | ||
*/ | ||
int mem_id; | ||
} atmi_mem_place_t; | ||
|
||
/** | ||
* @brief ATMI Memory Space/region Structure | ||
*/ | ||
typedef struct atmi_memory_s { | ||
/** | ||
* Memory capacity | ||
*/ | ||
unsigned long int capacity; | ||
/** | ||
* Memory type | ||
*/ | ||
atmi_memtype_t type; | ||
} atmi_memory_t; | ||
|
||
/** | ||
* @brief ATMI Device Structure | ||
*/ | ||
typedef struct atmi_device_s { | ||
/** | ||
* Device type: CPU, GPU or DSP | ||
*/ | ||
atmi_devtype_t type; | ||
/** | ||
* The number of compute cores | ||
*/ | ||
unsigned int core_count; | ||
/** | ||
* The number of memory spaces/regions that are accessible | ||
* from this device | ||
*/ | ||
unsigned int memory_count; | ||
/** | ||
* Array of memory spaces/regions that are accessible | ||
* from this device. | ||
*/ | ||
atmi_memory_t *memories; | ||
} atmi_device_t; | ||
|
||
/** | ||
* @brief ATMI Machine Structure | ||
*/ | ||
typedef struct atmi_machine_s { | ||
/** | ||
* The number of devices categorized by the device type | ||
*/ | ||
unsigned int device_count_by_type[ATMI_DEVTYPE_ALL]; | ||
/** | ||
* The device structures categorized by the device type | ||
*/ | ||
atmi_device_t *devices_by_type[ATMI_DEVTYPE_ALL]; | ||
} atmi_machine_t; | ||
|
||
// Below are some helper macros that can be used to setup | ||
// some of the ATMI data structures. | ||
#define ATMI_PLACE_CPU(node, cpu_id) \ | ||
{ .node_id = node, .type = ATMI_DEVTYPE_CPU, .device_id = cpu_id } | ||
#define ATMI_PLACE_GPU(node, gpu_id) \ | ||
{ .node_id = node, .type = ATMI_DEVTYPE_GPU, .device_id = gpu_id } | ||
#define ATMI_MEM_PLACE_CPU(node, cpu_id) \ | ||
{ \ | ||
.node_id = node, .dev_type = ATMI_DEVTYPE_CPU, .dev_id = cpu_id, \ | ||
.mem_id = -1 \ | ||
} | ||
#define ATMI_MEM_PLACE_GPU(node, gpu_id) \ | ||
{ \ | ||
.node_id = node, .dev_type = ATMI_DEVTYPE_GPU, .dev_id = gpu_id, \ | ||
.mem_id = -1 \ | ||
} | ||
#define ATMI_MEM_PLACE_CPU_MEM(node, cpu_id, cpu_mem_id) \ | ||
{ \ | ||
.node_id = node, .dev_type = ATMI_DEVTYPE_CPU, .dev_id = cpu_id, \ | ||
.mem_id = cpu_mem_id \ | ||
} | ||
#define ATMI_MEM_PLACE_GPU_MEM(node, gpu_id, gpu_mem_id) \ | ||
{ \ | ||
.node_id = node, .dev_type = ATMI_DEVTYPE_GPU, .dev_id = gpu_id, \ | ||
.mem_id = gpu_mem_id \ | ||
} | ||
#define ATMI_MEM_PLACE(d_type, d_id, m_id) \ | ||
{ .node_id = 0, .dev_type = d_type, .dev_id = d_id, .mem_id = m_id } | ||
|
||
#endif // INCLUDE_ATMI_H_ |
Oops, something went wrong.