Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

intermedia work on sparse matrix, nnet3bin now compiles again #36

Merged
merged 1 commit into from
Jul 30, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/cudamatrix/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ TESTFILES = cu-vector-test cu-matrix-test cu-math-test cu-test cu-sp-matrix-test


OBJFILES = cu-device.o cu-math.o cu-matrix.o cu-packed-matrix.o cu-sp-matrix.o \
cu-vector.o cu-common.o cu-tp-matrix.o cu-rand.o cu-block-matrix.o
cu-vector.o cu-common.o cu-tp-matrix.o cu-rand.o cu-block-matrix.o \
cu-sparse-matrix.o
ifeq ($(CUDA), true)
OBJFILES += cu-kernels.o cu-randkernels.o
endif
Expand Down
9 changes: 9 additions & 0 deletions src/cudamatrix/cu-kernels-ansi.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,15 @@ void cuda_copy_from_mat_ff_trans(dim3 Gr, dim3 Bl, float* mat_out, const float*
void cuda_copy_from_mat_fd_trans(dim3 Gr, dim3 Bl, float *mat_out, const double* mat_in, MatrixDim d_out, MatrixDim d_in);
void cuda_copy_from_mat_dd_trans(dim3 Gr, dim3 Bl, double *mat_out, const double* mat_in, MatrixDim d_out, MatrixDim d_in);

void cuda_copy_from_smat_ff(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_fd(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_df(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_dd(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_ff_trans(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_fd_trans(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_df_trans(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_dd_trans(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);

void cudaD_matrix_add_elements(dim3 Gr, dim3 Bl, double *data, MatrixDim dim, double alpha, MatrixElement<double>* x, int s);
void cudaD_comp_obj_deriv(dim3 Gr,dim3 Bl, MatrixElement<double>* x, int s, const double* z, MatrixDim d, double* z2, MatrixDim d2, double* t);

Expand Down
42 changes: 42 additions & 0 deletions src/cudamatrix/cu-kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,24 @@ static void _copy_from_mat_trans(Real* mat_out, const OtherReal* mat_in, MatrixD
mat_out[index_out] = static_cast<Real>(mat_in[index_in]);
}

template<typename Real, typename OtherReal>
__global__
static void _copy_from_smat(Real* mat_out, const MatrixElement<OtherReal>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
int smat_index = blockIdx.x * blockDim.x + threadIdx.x;
if (smat_index >= d_in) return;
int data_index = smat_in[smat_index].row * d_out.stride + smat_in[smat_index].column;
mat_out[data_index] = smat_in[smat_index].weight;
}

template<typename Real, typename OtherReal>
__global__
static void _copy_from_smat_trans(Real* mat_out, const MatrixElement<OtherReal>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
int smat_index = blockIdx.x * blockDim.x + threadIdx.x;
if (smat_index >= d_in) return;
int data_index = smat_in[smat_index].column * d_out.stride + smat_in[smat_index].row;
mat_out[data_index] = smat_in[smat_index].weight;
}

template<typename Real>
__global__
static void _transpose_matrix(Real* mat, MatrixDim d) {
Expand Down Expand Up @@ -2907,3 +2925,27 @@ void cuda_copy_from_mat_dd_trans(dim3 Gr, dim3 Bl, double *mat_out, const double
_copy_from_mat_trans<<<Gr,Bl>>>(mat_out,mat_in,d_out,d_in);
}

void cuda_copy_from_smat_ff(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_fd(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_df(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_dd(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_ff_trans(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat_trans<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_fd_trans(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat_trans<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_df_trans(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat_trans<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_dd_trans(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat_trans<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
26 changes: 26 additions & 0 deletions src/cudamatrix/cu-kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,32 @@ inline void cuda_copy_from_mat_trans(dim3 Gr, dim3 Bl, double* mat_out, const fl
cuda_copy_from_mat_df_trans(Gr, Bl, mat_out, mat_in, d_out, d_in);
}

inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_ff(Gr, Bl, mat_out, smat_in, d_out, d_in);
}
inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_fd(Gr, Bl, mat_out, smat_in, d_out, d_in);
}
inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_df(Gr, Bl, mat_out, smat_in, d_out, d_in);
}
inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_dd(Gr, Bl, mat_out, smat_in, d_out, d_in);
}

inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_ff_trans(Gr, Bl, mat_out, smat_in, d_out, d_in);
}
inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_fd_trans(Gr, Bl, mat_out, smat_in, d_out, d_in);
}
inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_df_trans(Gr, Bl, mat_out, smat_in, d_out, d_in);
}
inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_dd_trans(Gr, Bl, mat_out, smat_in, d_out, d_in);
}

inline void cuda_copy_col_from_vec(int Gr, int Bl, float* mat, const float* v, int col, MatrixDim d) { cudaF_copy_col_from_vec(Gr,Bl,mat,v,col,d); }
inline void cuda_apply_exp(dim3 Gr, dim3 Bl, float* mat, MatrixDim d) { cudaF_apply_exp(Gr,Bl,mat,d); }
inline void cuda_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, MatrixDim dim) { cudaF_apply_pow(Gr,Bl,mat,power,dim); }
Expand Down
1 change: 1 addition & 0 deletions src/cudamatrix/cu-matrix-lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "cudamatrix/cu-matrix.h"
#include "cudamatrix/cu-sp-matrix.h"
#include "cudamatrix/cu-tp-matrix.h"
#include "cudamatrix/cu-sparse-matrix.h"
#include "cudamatrix/cu-block-matrix.h"
#include "cudamatrix/cu-rand.h"

Expand Down
51 changes: 47 additions & 4 deletions src/cudamatrix/cu-matrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
#include "cudamatrix/cu-sp-matrix.h"
#include "cudamatrix/cu-tp-matrix.h"
#include "cudamatrix/cu-block-matrix.h"
#include "cudamatrix/cu-sparse-matrix.h"
#include "cudamatrix/cublas-wrappers.h"

namespace kaldi {
Expand Down Expand Up @@ -255,6 +254,53 @@ template
void CuMatrixBase<double>::CopyFromMat<double>(const CuMatrixBase<double> &M,
MatrixTransposeType Trans);

template <typename Real>
template <typename OtherReal>
void CuMatrixBase<Real>::CopyFromSmat(const CuSparseMatrix<OtherReal> &M,
MatrixTransposeType trans) {
// Sanity check.
if (trans == kNoTrans) {
KALDI_ASSERT(M.NumRows() == num_rows_ && M.NumCols() == num_cols_);
} else {
KALDI_ASSERT(M.NumCols() == num_rows_ && M.NumRows() == num_cols_);
}
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
Timer tim;
dim3 dimBlock(CU1DBLOCK, 1);
dim3 dimGrid(n_blocks(M.NumElements(), CU1DBLOCK), 1);
if (trans == kNoTrans) {
cuda_copy_from_smat(dimGrid, dimBlock, this->data_,
M.Data(), this->Dim(), M.NumElements());
} else {
cuda_copy_from_smat_trans(dimGrid, dimBlock, this->data_,
M.Data(), this->Dim(), M.NumElements());
}
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
} else
#endif
{
Mat().CopyFromSmat(M.Mat(), trans);
}
}

// Instantiate the template above.
template
void CuMatrixBase<float>::CopyFromSmat<float>(const CuSparseMatrix<float> &M,
MatrixTransposeType trans);

template
void CuMatrixBase<float>::CopyFromSmat<double>(const CuSparseMatrix<double> &M,
MatrixTransposeType trans);

template
void CuMatrixBase<double>::CopyFromSmat<float>(const CuSparseMatrix<float> &M,
MatrixTransposeType trans);

template
void CuMatrixBase<double>::CopyFromSmat<double>(const CuSparseMatrix<double> &M,
MatrixTransposeType trans);

template<typename Real>
template<typename OtherReal>
void CuMatrixBase<Real>::CopyFromTp(const CuTpMatrix<OtherReal> &M,
Expand Down Expand Up @@ -2240,9 +2286,6 @@ void CuMatrixBase<Real>::CopyFromGeneralMat(const GeneralMatrix &src,
return;
}
#endif
Matrix<BaseFloat> mat(trans == kNoTrans ? smat.NumRows() : smat.NumCols(),
trans == kNoTrans ? smat.NumCols() : smat.NumRows(),
kUndefined);
Mat().CopyFromSmat(smat, trans);
return;
}
Expand Down
1 change: 1 addition & 0 deletions src/cudamatrix/cu-matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "cudamatrix/cu-matrixdim.h"
#include "cudamatrix/cu-common.h"
#include "cudamatrix/cu-value.h"
#include "cudamatrix/cu-sparse-matrix.h"
#include "matrix/matrix-common.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/sparse-matrix.h"
Expand Down
Loading