Skip to content

Commit

Permalink
Merge pull request #52 from RadeonOpenCompute/develop
Browse files Browse the repository at this point in the history
rocBLAS first public release
  • Loading branch information
tingxingdong authored Nov 11, 2016
2 parents a434ea9 + 82efeec commit 0b38599
Show file tree
Hide file tree
Showing 78 changed files with 513 additions and 1,140 deletions.
24 changes: 12 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ project( rocblas-superbuild NONE )
# Everything is initially off, so that cache is not initialized until user elects to build
option( BUILD_LIBRARY "Build rocBLAS library" OFF )
option( BUILD_CLIENTS "Build rocBLAS clients" OFF )
option( BUILD_WITH_COBALT "Building rocBLAS with Cobalt or not" ON )
option( BUILD_WITH_TENSILE "Building rocBLAS with Tensile or not" ON )

# BUILD_SHARED_LIBS is a cmake built-in; we make it an explicit option such that it shows in cmake-gui
option( BUILD_SHARED_LIBS "Build rocBLAS as a shared library" OFF )
Expand Down Expand Up @@ -85,19 +85,19 @@ endif( )


if( BUILD_LIBRARY )
if( BUILD_WITH_COBALT )
if( BUILD_WITH_TENSILE )
# defines
# Cobalt_INCLUDE_DIRS
# CobaltLib_LIBRARIES
# CobaltLogger_LIBRARIES
# Tensile_INCLUDE_DIRS
# TensileLib_LIBRARIES
# TensileLogger_LIBRARIES

include( cmake/external-Cobalt.cmake )
list( APPEND rocblas_dependencies Cobalt )
message( STATUS "Cobalt_ROOT=${Cobalt_ROOT}" )
include( cmake/external-Tensile.cmake )
list( APPEND rocblas_dependencies Tensile )
message( STATUS "Tensile_ROOT=${Tensile_ROOT}" )

set(COBALT_CMAKE_ARGS
-DCMAKE_PREFIX_PATH=${Cobalt_ROOT}
-DBUILD_WITH_COBALT=${BUILD_WITH_COBALT}
set(TENSILE_CMAKE_ARGS
-DCMAKE_PREFIX_PATH=${Tensile_ROOT}
-DBUILD_WITH_TENSILE=${BUILD_WITH_TENSILE}
)
endif()

Expand All @@ -106,7 +106,7 @@ if( BUILD_LIBRARY )
-DHOST_TOOLCHAIN_FILE=${HOST_TOOLCHAIN_FILE}
-DDEVICE_TOOLCHAIN_FILE=${DEVICE_TOOLCHAIN_FILE}
-DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
${COBALT_CMAKE_ARGS}
${TENSILE_CMAKE_ARGS}
)

# Build the library as an external project
Expand Down
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ node('rocm-1.3 && fiji')
stage("configure clang release") {
// withEnv(['CXXFLAGS=-I /usr/include/c++/4.8 -I /usr/include/x86_64-linux-gnu/c++/4.8 -I /usr/include/x86_64-linux-gnu', 'HIP_PATH=/opt/rocm/hip']) {
// --amdgpu-target=AMD:AMDGPU:8:0:3
sh "cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_LIBRARY=ON -DBUILD_CLIENTS=ON -DBUILD_CLIENTS_SAMPLES=ON -DBUILD_CLIENTS_TESTS=ON -DBUILD_WITH_COBALT=ON -DHIP_ROOT=/opt/rocm/hip -DBOOST_ROOT=/opt/boost/clang ${scm_dir}"
sh "cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_LIBRARY=ON -DBUILD_CLIENTS=ON -DBUILD_CLIENTS_SAMPLES=ON -DBUILD_CLIENTS_TESTS=ON -DBUILD_WITH_TENSILE=ON -DHIP_ROOT=/opt/rocm/hip -DBOOST_ROOT=/opt/boost/clang ${scm_dir}"
// }
}

Expand Down
7 changes: 7 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Copyright © 2016 Advanced Micro Devices, Inc.

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Except a few routines (like TRSM) having memory allocation inside preventing asy
The [wiki][] has helpful information about building rocblas library, samples and testing files.
[wiki]: https://github.com/clMathLibraries/rocBLAS/wiki
[wiki]: https://github.com/RadeonOpenCompute/rocBLAS/wiki
[ROCm]: https://radeonopencompute.github.io/
Expand Down
8 changes: 8 additions & 0 deletions clients/benchmarks/client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <iostream>
#include <stdio.h>
#include <boost/program_options.hpp>

#include "rocblas.h"
#include "utility.h"
#include "testing_scal.hpp"
Expand All @@ -15,6 +16,7 @@
#include "testing_amax.hpp"
#include "testing_gemv.hpp"
#include "testing_trtri.hpp"
#include "testing_gemm.hpp"

namespace po = boost::program_options;

Expand Down Expand Up @@ -126,6 +128,12 @@ int main(int argc, char *argv[])
else if (precision == 'd')
testing_gemv<double>( argus );
}
else if (function == "gemm"){
if (precision == 's')
testing_gemm<float>( argus );
else if (precision == 'd')
testing_gemm<double>( argus );
}
else if (function == "trtri"){
if (precision == 's')
testing_trtri<float>( argus );
Expand Down
19 changes: 11 additions & 8 deletions clients/gtest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,16 +101,19 @@ find_package( GTest REQUIRED )
find_package( cblas REQUIRED CONFIG )
find_package( rocblas REQUIRED CONFIG )

set(Tensile_TEST_SRC
gemm_gtest.cpp
trsm_gtest.cpp
)


set(rocblas_test_source
rocblas_gtest_main.cpp
blas1_gtest.cpp
gemm_gtest.cpp
trsm_gtest.cpp )

set(rocblas_test_includes
../include/utility.h
${Tensile_TEST_SRC}
)


set( rocblas_benchmark_common
../common/utility.cpp
../common/cblas_interface.cpp
Expand All @@ -119,11 +122,11 @@ set( rocblas_benchmark_common
../common/unit.cpp
)

add_executable( rocblas-test ${rocblas_test_source} ${rocblas_test_includes} ${rocblas_benchmark_common} )
add_executable( rocblas-test ${rocblas_test_source} ${rocblas_benchmark_common} )

# Try to test for specific compiler features if cmake version is recent enough
if( CMAKE_VERSION VERSION_GREATER "3.0" )
target_compile_features( rocblas-test PRIVATE cxx_static_assert cxx_nullptr cxx_lambdas cxx_auto_type )
target_compile_features( rocblas-test PRIVATE cxx_static_assert cxx_nullptr cxx_lambdas cxx_auto_type)
else( )
# Otherwise, just try to compile the library with a standards flag
if( CMAKE_COMPILER_IS_GNUCXX OR ( CMAKE_CXX_COMPILER_ID MATCHES "Clang" ) )
Expand All @@ -134,7 +137,7 @@ else( )
endif( )
endif( )

target_compile_definitions( rocblas-test PRIVATE GTEST_USE_OWN_TR1_TUPLE=1 )
target_compile_definitions( rocblas-test PRIVATE GTEST_USE_OWN_TR1_TUPLE=1 -DGOOGLE_TEST)
target_include_directories( rocblas-test
PRIVATE
# $<BUILD_INTERFACE:${Boost_INCLUDE_DIRS}>
Expand Down
125 changes: 0 additions & 125 deletions clients/gtest/Makefile

This file was deleted.

Empty file removed clients/include/delete.me
Empty file.
19 changes: 11 additions & 8 deletions clients/include/testing_gemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ rocblas_status testing_gemm(Arguments argus)
gpu_time_used = get_time_us();// in microseconds
}

#if 0
//library interface
status = rocblas_gemm<T>(handle, transA, transB,
M, N, K,
Expand All @@ -116,7 +115,6 @@ rocblas_status testing_gemm(Arguments argus)
hipFree(dC);
return status;
}
#endif
// sleep(1);
if(argus.timing){
gpu_time_used = get_time_us() - gpu_time_used;
Expand Down Expand Up @@ -148,29 +146,36 @@ rocblas_status testing_gemm(Arguments argus)
cblas_gflops = gemm_gflop_count<T>(M, N, K) / cpu_time_used * 1e6;
}

for(int i=0;i<min(N, 4);i++)
for(int j=0;j<min(M,4);j++)
{
printf("matrix C col %d, row %d, CPU result=%f, GPU result=%f\n", i, j, hC_copy[j+i*ldc], hC[j+i*ldc]);
}

//enable unit check, notice unit check is not invasive, but norm check is,
// unit check and norm check can not be interchanged their order
if(argus.unit_check){
unit_check_general<T>(M, N, lda, hC_copy.data(), hC.data());
unit_check_general<T>(M, N, ldc, hC_copy.data(), hC.data());
}

//if enable norm check, norm check is invasive
//any typeinfo(T) will not work here, because template deduction is matched in compilation time
if(argus.norm_check){
rocblas_error = norm_check_general<T>('F', M, N, lda, hC_copy.data(), hC.data());
rocblas_error = norm_check_general<T>('F', M, N, ldc, hC_copy.data(), hC.data());
}

}// end of if unit/norm check

if(argus.timing){
//only norm_check return an norm error, unit check won't return anything,
cout << "M, N, K, lda, ldb, ldc, rocblas-Gflops (us) ";
cout << "Shape, M, N, K, lda, ldb, ldc, rocblas-Gflops (us) ";
if(argus.norm_check){
cout << "CPU-Gflops(us), norm-error" ;
}
cout << endl;

cout << "GG," << M <<','<< N <<',' << K <<',' << lda <<','<< ldb <<',' << ldc <<',' << rocblas_gflops << "(" << gpu_time_used << "),";
cout << argus.transA_option << argus.transB_option << ',' << M <<','<< N <<',' << K <<',' << lda <<','<< ldb <<','
<< ldc <<',' << rocblas_gflops << "(" << gpu_time_used << "),";

if(argus.norm_check){
cout << cblas_gflops << "(" << cpu_time_used << "),";
Expand Down Expand Up @@ -286,7 +291,6 @@ rocblas_status range_testing_gemm(Arguments argus)
gpu_time_used = get_time_us();// in microseconds
rocblas_gflops = gemm_gflop_count<T> (size, size, size) / gpu_time_used * 1e6 ;

#if 0
//library interface
status = rocblas_gemm<T>(handle, transA, transB,
size, size, size,
Expand All @@ -300,7 +304,6 @@ rocblas_status range_testing_gemm(Arguments argus)
hipFree(dC);
return status;
}
#endif

gpu_time_used = get_time_us() - gpu_time_used;

Expand Down
2 changes: 2 additions & 0 deletions clients/include/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#ifndef _TESTING_UTILITY_H_
#define _TESTING_UTILITY_H_

#include <stdio.h>
#include <stdlib.h>
#include <vector>
#include "rocblas.h"
#include <sys/time.h>
Expand Down
Loading

0 comments on commit 0b38599

Please sign in to comment.