Skip to content

Commit

Permalink
Merge pull request #19 from listenlink/upstream
Browse files Browse the repository at this point in the history
intelblas_gemm clean patch
  • Loading branch information
ptillet authored Feb 16, 2017
2 parents 74e9a03 + 461b83d commit 9b97c56
Show file tree
Hide file tree
Showing 18 changed files with 219,389 additions and 57,504 deletions.
2 changes: 0 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@ foreach(FILE ${LIBISAAC_SRC})
set(LIBISAAC_SRC_STR "${_TMP} ${LIBISAAC_SRC_STR}")
endforeach()



#Include directories
set(INCLUDE_DIRECTORIES_STR)
get_property(INCLUDE_DIRECTORIES_LST DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
Expand Down
6 changes: 6 additions & 0 deletions include/isaac/driver/dispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,11 @@ class dispatch
static cl_int clGetKernelWorkGroupInfo(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *);
static cl_kernel clCreateKernel(cl_program, const char *, cl_int *);
static cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *);
static cl_mem clCreateImage(cl_context, cl_mem_flags, const cl_image_format *, const cl_image_desc *, void *, cl_int *);
static cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *);
static cl_int clReleaseKernel(cl_kernel);
static cl_int clEnqueueCopyBufferToImage(cl_command_queue, cl_mem, cl_mem, size_t, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
static cl_int clSetEventCallback(cl_event, cl_int, void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), void *);

//CUDA
static CUresult cuCtxDestroy_v2(CUcontext ctx);
Expand Down Expand Up @@ -202,8 +205,11 @@ class dispatch
static void* clGetKernelWorkGroupInfo_;
static void* clCreateKernel_;
static void* clCreateBuffer_;
static void* clCreateImage_;
static void* clCreateProgramWithSource_;
static void* clReleaseKernel_;
static void* clEnqueueCopyBufferToImage_;
static void* clSetEventCallback_;

//CUDA
static void* cuCtxDestroy_v2_;
Expand Down
33 changes: 33 additions & 0 deletions include/isaac/jit/generation/gemm.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,39 @@ class cublas_gemm : public external_base
bool init_;
};

class intelblas_gemm : public external_base
{
bool init();
public:
intelblas_gemm(char A_trans, char B_trans);
int is_invalid(expression_tree const &, driver::Device const &) const;
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const &, std::string const &, runtime::execution_handler const & h);
expression_type type() const;
private:
std::string generate_impl(std::string const & suffix, expression_tree const &, driver::Device const & device, symbolic::symbols_table const &) const;
const char A_trans_;
const char B_trans_;
bool init_;
};

class intelblas_gemm_image : public external_base
{
bool init();
public:
intelblas_gemm_image(char A_trans, char B_trans);
int is_invalid(expression_tree const &, driver::Device const &) const;
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const &, std::string const &, runtime::execution_handler const & h);
expression_type type() const;
private:
std::string generate_impl(std::string const & suffix, expression_tree const &, driver::Device const & device, symbolic::symbols_table const &) const;
const char A_trans_;
const char B_trans_;
bool init_;
};


class gemm : public parameterized_base
{
private:
Expand Down
4 changes: 2 additions & 2 deletions lib/api/blas/clBLAS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,8 @@ extern "C"
}\
sc::int_t As1 = (sc::int_t)M, As2 = (sc::int_t)K;\
sc::int_t Bs1 = (sc::int_t)K, Bs2 = (sc::int_t)N;\
if(transA==clblasTrans) std::swap(As1, As2);\
if(transB==clblasTrans) std::swap(Bs1, Bs2);\
if(transA==clblasTrans || transA==clblasConjTrans) std::swap(As1, As2);\
if(transB==clblasTrans || transB==clblasConjTrans) std::swap(Bs1, Bs2);\
/*Struct*/\
sc::array A(As1, As2, TYPE_ISAAC, sc::driver::Buffer(mA, false), (sc::int_t)offA, (sc::int_t)lda);\
sc::array B(Bs1, Bs2, TYPE_ISAAC, sc::driver::Buffer(mB, false), (sc::int_t)offB, (sc::int_t)ldb);\
Expand Down
3 changes: 3 additions & 0 deletions lib/driver/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,9 @@ std::string Device::infos() const
return oss.str();
}

Device::handle_type const & Device::handle() const
{ return h_; }

// Properties
#define WRAP_ATTRIBUTE(ret, fname, CUNAME, CLNAME) \
ret Device::fname() const\
Expand Down
8 changes: 8 additions & 0 deletions lib/driver/dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ cl_int dispatch::clBuildProgram(cl_program a, cl_uint b, const cl_device_id * c,
cl_context dispatch::clCreateContext(const cl_context_properties * a, cl_uint b, const cl_device_id * c, void (*d)(const char *, const void *, size_t, void *), void * e, cl_int * f)
{ return f_impl<dispatch::clinit>(opencl_, dispatch::clCreateContext, dispatch::clCreateContext_, "clCreateContext", a, b, c, d, e, f); }

cl_int dispatch::clSetEventCallback(cl_event event, cl_int a, void(CL_CALLBACK *pfn_notify)(cl_event, cl_int, void *), void * arg)
{ return f_impl<dispatch::clinit>(opencl_, dispatch::clSetEventCallback, dispatch::clSetEventCallback_, "clSetEventCallback", event, a, pfn_notify, arg); }

OCL_DEFINE9(cl_int, clEnqueueNDRangeKernel, cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*)
OCL_DEFINE4(cl_int, clSetKernelArg, cl_kernel, cl_uint, size_t, const void *)
OCL_DEFINE1(cl_int, clReleaseMemObject, cl_mem)
Expand Down Expand Up @@ -171,8 +174,10 @@ OCL_DEFINE5(cl_int, clGetKernelInfo, cl_kernel, cl_kernel_info, size_t, void *,
OCL_DEFINE6(cl_int, clGetKernelWorkGroupInfo, cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *)
OCL_DEFINE3(cl_kernel, clCreateKernel, cl_program, const char *, cl_int *)
OCL_DEFINE5(cl_mem, clCreateBuffer, cl_context, cl_mem_flags, size_t, void *, cl_int *)
OCL_DEFINE6(cl_mem, clCreateImage, cl_context, cl_mem_flags, const cl_image_format *, const cl_image_desc *, void *, cl_int *)
OCL_DEFINE5(cl_program, clCreateProgramWithSource, cl_context, cl_uint, const char **, const size_t *, cl_int *)
OCL_DEFINE1(cl_int, clReleaseKernel, cl_kernel)
OCL_DEFINE9(cl_int, clEnqueueCopyBufferToImage, cl_command_queue, cl_mem, cl_mem, size_t, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *)

//CUDA
CUDA_DEFINE1(CUresult, cuCtxDestroy_v2, CUcontext)
Expand Down Expand Up @@ -291,8 +296,11 @@ void* dispatch::clGetKernelInfo_;
void* dispatch::clGetKernelWorkGroupInfo_;
void* dispatch::clCreateKernel_;
void* dispatch::clCreateBuffer_;
void* dispatch::clCreateImage_;
void* dispatch::clCreateProgramWithSource_;
void* dispatch::clReleaseKernel_;
void* dispatch::clEnqueueCopyBufferToImage_;
void* dispatch::clSetEventCallback_;

//CUDA
void* dispatch::cuCtxDestroy_v2_;
Expand Down
3 changes: 3 additions & 0 deletions lib/driver/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ void Kernel::setSizeArg(unsigned int index, size_t N)
}
}

Kernel::handle_type const & Kernel::handle() const
{ return h_; }

}

}
Expand Down
1 change: 0 additions & 1 deletion lib/jit/generation/elementwise_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
#include "tools/loop.hpp"
#include "tools/vector_types.hpp"


namespace isaac
{
namespace templates
Expand Down
Loading

0 comments on commit 9b97c56

Please sign in to comment.