Skip to content

Commit

Permalink
Merged master:d0b312955f12 into amd-gfx:4c4f72e10e54
Browse files Browse the repository at this point in the history
Local branch amd-gfx 4c4f72e Merged master:22cbe40fa997 into amd-gfx:b419d0d534fb
Remote branch master d0b3129 [libomptarget] Implement host plugin for amdgpu
  • Loading branch information
Sw authored and Sw committed Aug 15, 2020
2 parents 4c4f72e + d0b3129 commit 5ffe73c
Show file tree
Hide file tree
Showing 24 changed files with 5,203 additions and 4 deletions.
4 changes: 1 addition & 3 deletions llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,8 @@ Linkage MachOLinkGraphBuilder::getLinkage(uint16_t Desc) {
}

Scope MachOLinkGraphBuilder::getScope(StringRef Name, uint8_t Type) {
if (Type & MachO::N_PEXT)
return Scope::Hidden;
if (Type & MachO::N_EXT) {
if (Name.startswith("l"))
if ((Type & MachO::N_PEXT) || Name.startswith("l"))
return Scope::Hidden;
else
return Scope::Default;
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# RUN: llvm-jitlink -noexec %S/Inputs/MachO_x86-64_was_private_extern.o
#
# Perform a no-exec link of MachO_x86-64_was_private_extern.o and verify that
# it does not generate any errors despite the presence of a 'was private
# extern' symbol (N_PEXT set, N_EXT unset).
#
# The test case for this is a relocatable object file rather than assembly as
# objects must be run through ld64's 'ld -r' mode to produce them and we can't
# assume that that is available everywhere.
1 change: 1 addition & 0 deletions openmp/libomptarget/plugins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ endif()
endmacro()

add_subdirectory(aarch64)
add_subdirectory(amdgpu)
add_subdirectory(cuda)
add_subdirectory(ppc64)
add_subdirectory(ppc64le)
Expand Down
84 changes: 84 additions & 0 deletions openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
##===----------------------------------------------------------------------===##
#
# The LLVM Compiler Infrastructure
#
# This file is dual licensed under the MIT and the University of Illinois Open
# Source Licenses. See LICENSE.txt for details.
#
##===----------------------------------------------------------------------===##
#
# Build a plugin for an AMDGPU machine if available.
#
##===----------------------------------------------------------------------===##

################################################################################

if(NOT LIBOMPTARGET_DEP_LIBELF_FOUND)
libomptarget_say("Not building AMDGPU plugin: LIBELF not found")
return()
endif()

if(NOT ROCM_DIR)
libomptarget_say("Not building AMDGPU plugin: ROCM_DIR is not set")
return()
endif()

set(LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS ${ROCM_DIR}/hsa/include ${ROCM_DIR}/hsa/include/hsa)
set(LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS ${ROCM_DIR}/hsa/lib)
set(LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS ${ROCM_DIR}/lib)

mark_as_advanced( LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS)

if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux")
libomptarget_say("Not building amdgpu plugin: only support amdgpu in Linux x86_64, ppc64le, or aarch64 hosts.")
return()
endif()
libomptarget_say("Building amdgpu offloading plugin using ROCM_DIR = ${ROCM_DIR}")

libomptarget_say("LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS: ${LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS}")
libomptarget_say("LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS ${LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS}")
libomptarget_say("LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS: ${LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS}")

################################################################################
# Define the suffix for the runtime messaging dumps.
add_definitions(-DTARGET_NAME=AMDGPU)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(ppc64le)|(aarch64)$")
add_definitions(-DLITTLEENDIAN_CPU=1)
endif()

if(CMAKE_BUILD_TYPE MATCHES Debug)
add_definitions(-DDEBUG)
endif()

include_directories(
${LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/impl
)

add_library(omptarget.rtl.amdgpu SHARED
impl/atmi.cpp
impl/atmi_interop_hsa.cpp
impl/data.cpp
impl/machine.cpp
impl/system.cpp
impl/utils.cpp
impl/msgpack.cpp
src/rtl.cpp
)

# Install plugin under the lib destination folder.
# When we build for debug, OPENMP_LIBDIR_SUFFIX get set to -debug
install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "lib${OPENMP_LIBDIR_SUFFIX}")

target_link_libraries(
omptarget.rtl.amdgpu
-lpthread -ldl -Wl,-rpath,${OPENMP_INSTALL_LIBDIR}
-L${LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS} -L${LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS} -lhsa-runtime64 -lhsakmt -Wl,-rpath,${LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS},-rpath,${LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS}
-lelf
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports"
"-Wl,-z,defs"
)

# Report to the parent scope that we are building a plugin for amdgpu
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa" PARENT_SCOPE)

44 changes: 44 additions & 0 deletions openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*===--------------------------------------------------------------------------
* ATMI (Asynchronous Task and Memory Interface)
*
* This file is distributed under the MIT License. See LICENSE.txt for details.
*===------------------------------------------------------------------------*/
#include "rt.h"
/*
* Initialize/Finalize
*/
atmi_status_t atmi_init() { return core::Runtime::Initialize(); }

atmi_status_t atmi_finalize() { return core::Runtime::Finalize(); }

/*
* Machine Info
*/
atmi_machine_t *atmi_machine_get_info() {
return core::Runtime::GetMachineInfo();
}

/*
* Modules
*/
atmi_status_t atmi_module_register_from_memory_to_place(
void *module_bytes, size_t module_size, atmi_place_t place,
atmi_status_t (*on_deserialized_data)(void *data, size_t size,
void *cb_state),
void *cb_state) {
return core::Runtime::getInstance().RegisterModuleFromMemory(
module_bytes, module_size, place, on_deserialized_data, cb_state);
}

/*
* Data
*/
atmi_status_t atmi_memcpy(void *dest, const void *src, size_t size) {
return core::Runtime::Memcpy(dest, src, size);
}

atmi_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); }

atmi_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place) {
return core::Runtime::Malloc(ptr, size, place);
}
203 changes: 203 additions & 0 deletions openmp/libomptarget/plugins/amdgpu/impl/atmi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
/*===--------------------------------------------------------------------------
* ATMI (Asynchronous Task and Memory Interface)
*
* This file is distributed under the MIT License. See LICENSE.txt for details.
*===------------------------------------------------------------------------*/
#ifndef INCLUDE_ATMI_H_
#define INCLUDE_ATMI_H_

#define ROCM_VERSION_MAJOR 3
#define ROCM_VERSION_MINOR 2

/** \defgroup enumerations Enumerated Types
* @{
*/

/**
* @brief Status codes.
*/
typedef enum atmi_status_t {
/**
* The function has been executed successfully.
*/
ATMI_STATUS_SUCCESS = 0,
/**
* A undocumented error has occurred.
*/
ATMI_STATUS_UNKNOWN = 1,
/**
* A generic error has occurred.
*/
ATMI_STATUS_ERROR = 2,
} atmi_status_t;

/**
* @brief Device Types.
*/
typedef enum atmi_devtype_s {
ATMI_DEVTYPE_CPU = 0x0001,
ATMI_DEVTYPE_iGPU = 0x0010, // Integrated GPU
ATMI_DEVTYPE_dGPU = 0x0100, // Discrete GPU
ATMI_DEVTYPE_GPU = ATMI_DEVTYPE_iGPU | ATMI_DEVTYPE_dGPU, // Any GPU
ATMI_DEVTYPE_ALL = 0x111 // Union of all device types
} atmi_devtype_t;

/**
* @brief Memory Access Type.
*/
typedef enum atmi_memtype_s {
ATMI_MEMTYPE_FINE_GRAINED = 0,
ATMI_MEMTYPE_COARSE_GRAINED = 1,
ATMI_MEMTYPE_ANY
} atmi_memtype_t;

/**
* @brief ATMI Memory Fences for Tasks.
*/
typedef enum atmi_task_fence_scope_s {
/**
* No memory fence applied; external fences have to be applied around the task
* launch/completion.
*/
ATMI_FENCE_SCOPE_NONE = 0,
/**
* The fence is applied to the device.
*/
ATMI_FENCE_SCOPE_DEVICE = 1,
/**
* The fence is applied to the entire system.
*/
ATMI_FENCE_SCOPE_SYSTEM = 2
} atmi_task_fence_scope_t;

/** @} */

/** \defgroup common Common ATMI Structures
* @{
*/

/**
* @brief ATMI Compute Place
*/
typedef struct atmi_place_s {
/**
* The node in a cluster where computation should occur.
* Default is node_id = 0 for local computations.
*/
unsigned int node_id;
/**
* Device type: CPU, GPU or DSP
*/
atmi_devtype_t type;
/**
* The device ordinal number ordered by runtime; -1 for any
*/
int device_id;
} atmi_place_t;

/**
* @brief ATMI Memory Place
*/
typedef struct atmi_mem_place_s {
/**
* The node in a cluster where computation should occur.
* Default is node_id = 0 for local computations.
*/
unsigned int node_id;
/**
* Device type: CPU, GPU or DSP
*/
atmi_devtype_t dev_type;
/**
* The device ordinal number ordered by runtime; -1 for any
*/
int dev_id;
// atmi_memtype_t mem_type; // Fine grained or Coarse grained
/**
* The memory space/region ordinal number ordered by runtime; -1 for any
*/
int mem_id;
} atmi_mem_place_t;

/**
* @brief ATMI Memory Space/region Structure
*/
typedef struct atmi_memory_s {
/**
* Memory capacity
*/
unsigned long int capacity;
/**
* Memory type
*/
atmi_memtype_t type;
} atmi_memory_t;

/**
* @brief ATMI Device Structure
*/
typedef struct atmi_device_s {
/**
* Device type: CPU, GPU or DSP
*/
atmi_devtype_t type;
/**
* The number of compute cores
*/
unsigned int core_count;
/**
* The number of memory spaces/regions that are accessible
* from this device
*/
unsigned int memory_count;
/**
* Array of memory spaces/regions that are accessible
* from this device.
*/
atmi_memory_t *memories;
} atmi_device_t;

/**
* @brief ATMI Machine Structure
*/
typedef struct atmi_machine_s {
/**
* The number of devices categorized by the device type
*/
unsigned int device_count_by_type[ATMI_DEVTYPE_ALL];
/**
* The device structures categorized by the device type
*/
atmi_device_t *devices_by_type[ATMI_DEVTYPE_ALL];
} atmi_machine_t;

// Below are some helper macros that can be used to setup
// some of the ATMI data structures.
#define ATMI_PLACE_CPU(node, cpu_id) \
{ .node_id = node, .type = ATMI_DEVTYPE_CPU, .device_id = cpu_id }
#define ATMI_PLACE_GPU(node, gpu_id) \
{ .node_id = node, .type = ATMI_DEVTYPE_GPU, .device_id = gpu_id }
#define ATMI_MEM_PLACE_CPU(node, cpu_id) \
{ \
.node_id = node, .dev_type = ATMI_DEVTYPE_CPU, .dev_id = cpu_id, \
.mem_id = -1 \
}
#define ATMI_MEM_PLACE_GPU(node, gpu_id) \
{ \
.node_id = node, .dev_type = ATMI_DEVTYPE_GPU, .dev_id = gpu_id, \
.mem_id = -1 \
}
#define ATMI_MEM_PLACE_CPU_MEM(node, cpu_id, cpu_mem_id) \
{ \
.node_id = node, .dev_type = ATMI_DEVTYPE_CPU, .dev_id = cpu_id, \
.mem_id = cpu_mem_id \
}
#define ATMI_MEM_PLACE_GPU_MEM(node, gpu_id, gpu_mem_id) \
{ \
.node_id = node, .dev_type = ATMI_DEVTYPE_GPU, .dev_id = gpu_id, \
.mem_id = gpu_mem_id \
}
#define ATMI_MEM_PLACE(d_type, d_id, m_id) \
{ .node_id = 0, .dev_type = d_type, .dev_id = d_id, .mem_id = m_id }

#endif // INCLUDE_ATMI_H_
Loading

0 comments on commit 5ffe73c

Please sign in to comment.