Skip to content

Commit

Permalink
Merge pull request #50 from amdadvtech/feature/ORO-0-only-bundle
Browse files Browse the repository at this point in the history
Feature/oro 0 only bundle
  • Loading branch information
takahiroharada authored Mar 7, 2023
2 parents 7c9ad91 + 94ff379 commit c17bf53
Show file tree
Hide file tree
Showing 48 changed files with 36 additions and 80 deletions.
3 changes: 3 additions & 0 deletions Orochi/Orochi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,9 @@ oroError OROAPI oroGetDeviceProperties(oroDeviceProp* props, oroDevice dev)
e = cuDeviceGetAttribute( &props->computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, deviceId );
e = cuDeviceGetAttribute( &props->concurrentKernels, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, deviceId );
e = cuDeviceGetAttribute( &props->ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, deviceId );
e = cuDeviceGetAttribute( &props->major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, deviceId );
e = cuDeviceGetAttribute( &props->minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, deviceId );

return oroSuccess;
}
return oroErrorUnknown;
Expand Down
2 changes: 2 additions & 0 deletions UnitTest/bitcodes/generate_bitcodes.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
call hipcc --cuda-device-only --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestKernel.cpp
call hipcc --cuda-device-only --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestFunc.cpp
28 changes: 0 additions & 28 deletions UnitTest/bitcodes/generate_bitcodes.sh
Original file line number Diff line number Diff line change
@@ -1,30 +1,2 @@
hipcc --cuda-device-only --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestKernel.cpp

hipcc --cuda-device-only --offload-arch=gfx1030 --offload-arch=gfx1031 --offload-arch=gfx1032 --offload-arch=gfx1033 --offload-arch=gfx1034 --offload-arch=gfx1035 --offload-arch=gfx1036 --offload-arch=gfx1010 --offload-arch=gfx1011 --offload-arch=gfx1012 --offload-arch=gfx1013 --offload-arch=gfx900 --offload-arch=gfx906 -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm ../moduleTestFunc.cpp
hipcc --offload-arch=gfx1030 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1031 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1032 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1033 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1034 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1035 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1036 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1010 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1011 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1012 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1013 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx900 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx906 ../moduleTestFunc.cpp -c -fgpu-rdc --cuda-device-only

hipcc --offload-arch=gfx1030 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1031 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1032 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1033 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1034 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1035 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1036 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1010 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1011 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1012 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx1013 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx900 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
hipcc --offload-arch=gfx906 ../moduleTestKernel.cpp -c -fgpu-rdc --cuda-device-only
7 changes: 0 additions & 7 deletions UnitTest/bitcodes/generate_bitcodes_gfx1100.bat

This file was deleted.

6 changes: 0 additions & 6 deletions UnitTest/bitcodes/generate_bitcodes_gfx1100.sh

This file was deleted.

7 changes: 0 additions & 7 deletions UnitTest/bitcodes/generate_bitcodes_gfx1102.bat

This file was deleted.

6 changes: 0 additions & 6 deletions UnitTest/bitcodes/generate_bitcodes_gfx1102.sh

This file was deleted.

2 changes: 2 additions & 0 deletions UnitTest/bitcodes/generate_bitcodes_nvidia.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
nvcc -x cu -fatbin --device-c -arch=all ../moduleTestFunc.cpp
nvcc -x cu -fatbin --device-c -arch=all ../moduleTestKernel.cpp
6 changes: 2 additions & 4 deletions UnitTest/bitcodes/generate_bitcodes_nvidia.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
nvcc -arch=compute_80 -code="sm_80,sm_86,sm_87" -fatbin --device-c ../moduleTestFunc.cu
nvcc -arch=compute_80 -code="sm_80,sm_86,sm_87" -fatbin --device-c ../moduleTestKernel.cu
nvcc -cubin --device-c -arch=sm_80 ../moduleTestFunc.cu
nvcc -cubin --device-c -arch=sm_80 ../moduleTestKernel.cu
nvcc -fatbin --device-c -arch=all ../moduleTestFunc.cu
nvcc -fatbin --device-c -arch=all ../moduleTestKernel.cu
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed UnitTest/bitcodes/moduleTestFunc.cubin
Binary file not shown.
Binary file removed UnitTest/bitcodes/moduleTestFunc.fatbin
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed UnitTest/bitcodes/moduleTestKernel.cubin
Binary file not shown.
Binary file removed UnitTest/bitcodes/moduleTestKernel.fatbin
Binary file not shown.
23 changes: 14 additions & 9 deletions UnitTest/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ void loadFile( const char* path, std::vector<char>& dst )
f.close();
}
}

#if 0
TEST_F( OroTestBase, linkBc )
{
oroDeviceProp props;
Expand Down Expand Up @@ -228,7 +228,7 @@ TEST_F( OroTestBase, linkBc )
ORORTCCHECK( oroModuleUnload( module ) );
}
}

#endif
TEST_F( OroTestBase, link )
{
oroDeviceProp props;
Expand All @@ -237,8 +237,10 @@ TEST_F( OroTestBase, link )
std::vector<char> data1;
const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP;

std::string arch = "-arch=sm_" + std::to_string( props.major ) + std::string( "0" );

std::vector<const char*> opts = isAmd ? std::vector<const char *>({ "-fgpu-rdc", "-c", "--cuda-device-only" })
: std::vector<const char *>({ "--device-c", "-arch=sm_80" });
: std::vector<const char *>({ "--device-c", arch.c_str() });
{
std::string code;
OrochiUtils::readSourceCode( "../UnitTest/moduleTestKernel.h", code );
Expand Down Expand Up @@ -310,7 +312,7 @@ TEST_F( OroTestBase, link )
ORORTCCHECK( oroModuleUnload( module ) );
}
}

#if 0
TEST_F( OroTestBase, link_addFile )
{
oroDeviceProp props;
Expand Down Expand Up @@ -385,6 +387,7 @@ TEST_F( OroTestBase, link_addFile )
ORORTCCHECK( oroModuleUnload( module ) );
}
}
#endif

TEST_F( OroTestBase, link_null_name )
{
Expand All @@ -393,9 +396,9 @@ TEST_F( OroTestBase, link_null_name )
std::vector<char> data0;
std::vector<char> data1;
const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP;

std::string arch = "-arch=sm_" + std::to_string( props.major ) + std::string( "0" );
std::vector<const char*> opts = isAmd ? std::vector<const char *>({ "-fgpu-rdc", "-c", "--cuda-device-only" })
: std::vector<const char *>({ "--device-c", "-arch=sm_80" });
: std::vector<const char *>({ "--device-c", arch.c_str() });
{
std::string code;
OrochiUtils::readSourceCode( "../UnitTest/moduleTestKernel.h", code );
Expand Down Expand Up @@ -526,14 +529,14 @@ TEST_F( OroTestBase, link_bundledBc_with_bc )
std::vector<char> data0;
std::vector<char> data1;
const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP;

std::string arch = "-arch=sm_" + std::to_string( props.major ) + std::string( "0" );
{
std::string bcFile = isAmd ? "../UnitTest/bitcodes/moduleTestFunc-hip-amdgcn-amd-amdhsa.bc" : "../UnitTest/bitcodes/moduleTestFunc.fatbin";
loadFile( bcFile.c_str(), data1 );
}
{
std::vector<const char*> opts = isAmd ? std::vector<const char *>({ "-fgpu-rdc", "-c", "--cuda-device-only" })
: std::vector<const char *>({ "--device-c", "-arch=sm_80" });
: std::vector<const char *>({ "--device-c", arch.c_str() });
std::string code;
OrochiUtils::readSourceCode( "../UnitTest/moduleTestKernel.h", code );
OrochiUtils::getData( m_device, code.c_str(), "../UnitTest/moduleTestKernel.h", &opts, data0 );
Expand Down Expand Up @@ -607,6 +610,7 @@ TEST_F( OroTestBase, link_bundledBc_with_bc_loweredName )
std::vector<char> data0;
std::vector<char> data1;
const bool isAmd = oroGetCurAPI( 0 ) == ORO_API_HIP;
std::string arch = "-arch=sm_" + std::to_string( props.major ) + std::string( "0" );
const char* funcName = "testKernel<0>";
std::string loweredNameStr;
orortcProgram prog;
Expand All @@ -616,7 +620,8 @@ TEST_F( OroTestBase, link_bundledBc_with_bc_loweredName )
loadFile( bcFile.c_str(), data1 );
}
{
std::vector<const char*> opts = isAmd ? std::vector<const char*>( { "-fgpu-rdc", "-c", "--cuda-device-only" } ) : std::vector<const char*>( { "--device-c", "-arch=sm_80" } );
std::vector<const char*> opts = isAmd ? std::vector<const char*>( { "-fgpu-rdc", "-c", "--cuda-device-only" } )
: std::vector<const char*>( { "--device-c", arch.c_str() } );
std::string code;

OrochiUtils::readSourceCode( "../UnitTest/moduleTestKernel_loweredName.h", code );
Expand Down
2 changes: 2 additions & 0 deletions UnitTest/moduleTestFunc.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#if !defined( __CUDACC__ )
#include <hip/hip_runtime.h>
#endif

__device__ void setInfo( int *x )
{
Expand Down
5 changes: 0 additions & 5 deletions UnitTest/moduleTestFunc.cu

This file was deleted.

6 changes: 0 additions & 6 deletions UnitTest/moduleTestKernel.cu

This file was deleted.

4 changes: 4 additions & 0 deletions UnitTest/premake5.lua
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ project "Unittest"
files { "../contrib/gtest-1.6.0/gtest-all.cc" }
sysincludedirs{ "../contrib/gtest-1.6.0/" }
defines { "GTEST_HAS_TR1_TUPLE=0" }
if _OPTIONS["kernelcompile"] then
os.execute( "cd ./bitcodes/ && generate_bitcodes.bat" )
os.execute( "cd ./bitcodes/ && generate_bitcodes_nvidia.bat" )
end
4 changes: 2 additions & 2 deletions contrib/hipew/src/hipew.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,8 +480,8 @@ void hipewInit( int* resultDriver, int* resultRtc, hipuint32_t flags )
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetProgramLog );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetProgramLogSize );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetCode );
// _LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetBitcodeSize );
// _LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetBitcode );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetBitcodeSize );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetBitcode );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcGetCodeSize );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcLinkCreate );
_LIBRARY_FIND_CHECKED( rtcLib, hiprtcLinkAddFile );
Expand Down
5 changes: 5 additions & 0 deletions premake5.lua
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ newoption {
description = "Use precompiled kernels"
}

newoption {
trigger = "kernelcompile",
description = "Compile kernels used for unit test"
}

function copydir(src_dir, dst_dir, filter, single_dst_dir)
if not os.isdir(src_dir) then
printError("'%s' is not an existing directory!", src_dir)
Expand Down

0 comments on commit c17bf53

Please sign in to comment.