Skip to content

Commit

Permalink
Merge pull request #36 from chenguoguo/guoguo-nnet3
Browse files Browse the repository at this point in the history
intermedia work on sparse matrix, nnet3bin now compiles again
  • Loading branch information
danpovey committed Jul 30, 2015
2 parents 53baf84 + 2e9f6f5 commit b1a372d
Show file tree
Hide file tree
Showing 12 changed files with 260 additions and 2,414 deletions.
3 changes: 2 additions & 1 deletion src/cudamatrix/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ TESTFILES = cu-vector-test cu-matrix-test cu-math-test cu-test cu-sp-matrix-test


OBJFILES = cu-device.o cu-math.o cu-matrix.o cu-packed-matrix.o cu-sp-matrix.o \
cu-vector.o cu-common.o cu-tp-matrix.o cu-rand.o cu-block-matrix.o
cu-vector.o cu-common.o cu-tp-matrix.o cu-rand.o cu-block-matrix.o \
cu-sparse-matrix.o
ifeq ($(CUDA), true)
OBJFILES += cu-kernels.o cu-randkernels.o
endif
Expand Down
9 changes: 9 additions & 0 deletions src/cudamatrix/cu-kernels-ansi.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,15 @@ void cuda_copy_from_mat_ff_trans(dim3 Gr, dim3 Bl, float* mat_out, const float*
void cuda_copy_from_mat_fd_trans(dim3 Gr, dim3 Bl, float *mat_out, const double* mat_in, MatrixDim d_out, MatrixDim d_in);
void cuda_copy_from_mat_dd_trans(dim3 Gr, dim3 Bl, double *mat_out, const double* mat_in, MatrixDim d_out, MatrixDim d_in);

void cuda_copy_from_smat_ff(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_fd(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_df(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_dd(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_ff_trans(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_fd_trans(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_df_trans(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);
void cuda_copy_from_smat_dd_trans(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in);

void cudaD_matrix_add_elements(dim3 Gr, dim3 Bl, double *data, MatrixDim dim, double alpha, MatrixElement<double>* x, int s);
void cudaD_comp_obj_deriv(dim3 Gr,dim3 Bl, MatrixElement<double>* x, int s, const double* z, MatrixDim d, double* z2, MatrixDim d2, double* t);

Expand Down
42 changes: 42 additions & 0 deletions src/cudamatrix/cu-kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,24 @@ static void _copy_from_mat_trans(Real* mat_out, const OtherReal* mat_in, MatrixD
mat_out[index_out] = static_cast<Real>(mat_in[index_in]);
}

template<typename Real, typename OtherReal>
__global__
static void _copy_from_smat(Real* mat_out, const MatrixElement<OtherReal>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
int smat_index = blockIdx.x * blockDim.x + threadIdx.x;
if (smat_index >= d_in) return;
int data_index = smat_in[smat_index].row * d_out.stride + smat_in[smat_index].column;
mat_out[data_index] = smat_in[smat_index].weight;
}

template<typename Real, typename OtherReal>
__global__
static void _copy_from_smat_trans(Real* mat_out, const MatrixElement<OtherReal>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
int smat_index = blockIdx.x * blockDim.x + threadIdx.x;
if (smat_index >= d_in) return;
int data_index = smat_in[smat_index].column * d_out.stride + smat_in[smat_index].row;
mat_out[data_index] = smat_in[smat_index].weight;
}

template<typename Real>
__global__
static void _transpose_matrix(Real* mat, MatrixDim d) {
Expand Down Expand Up @@ -2907,3 +2925,27 @@ void cuda_copy_from_mat_dd_trans(dim3 Gr, dim3 Bl, double *mat_out, const double
_copy_from_mat_trans<<<Gr,Bl>>>(mat_out,mat_in,d_out,d_in);
}

void cuda_copy_from_smat_ff(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_fd(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_df(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_dd(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_ff_trans(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat_trans<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_fd_trans(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat_trans<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_df_trans(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat_trans<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
void cuda_copy_from_smat_dd_trans(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
_copy_from_smat_trans<<<Gr,Bl>>>(mat_out, smat_in, d_out, d_in);
}
26 changes: 26 additions & 0 deletions src/cudamatrix/cu-kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,32 @@ inline void cuda_copy_from_mat_trans(dim3 Gr, dim3 Bl, double* mat_out, const fl
cuda_copy_from_mat_df_trans(Gr, Bl, mat_out, mat_in, d_out, d_in);
}

inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_ff(Gr, Bl, mat_out, smat_in, d_out, d_in);
}
inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_fd(Gr, Bl, mat_out, smat_in, d_out, d_in);
}
inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_df(Gr, Bl, mat_out, smat_in, d_out, d_in);
}
inline void cuda_copy_from_smat(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_dd(Gr, Bl, mat_out, smat_in, d_out, d_in);
}

inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_ff_trans(Gr, Bl, mat_out, smat_in, d_out, d_in);
}
inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, float* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_fd_trans(Gr, Bl, mat_out, smat_in, d_out, d_in);
}
inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<float>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_df_trans(Gr, Bl, mat_out, smat_in, d_out, d_in);
}
inline void cuda_copy_from_smat_trans(dim3 Gr, dim3 Bl, double* mat_out, const MatrixElement<double>* smat_in, MatrixDim d_out, MatrixIndexT_cuda d_in) {
cuda_copy_from_smat_dd_trans(Gr, Bl, mat_out, smat_in, d_out, d_in);
}

inline void cuda_copy_col_from_vec(int Gr, int Bl, float* mat, const float* v, int col, MatrixDim d) { cudaF_copy_col_from_vec(Gr,Bl,mat,v,col,d); }
inline void cuda_apply_exp(dim3 Gr, dim3 Bl, float* mat, MatrixDim d) { cudaF_apply_exp(Gr,Bl,mat,d); }
inline void cuda_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, MatrixDim dim) { cudaF_apply_pow(Gr,Bl,mat,power,dim); }
Expand Down
1 change: 1 addition & 0 deletions src/cudamatrix/cu-matrix-lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "cudamatrix/cu-matrix.h"
#include "cudamatrix/cu-sp-matrix.h"
#include "cudamatrix/cu-tp-matrix.h"
#include "cudamatrix/cu-sparse-matrix.h"
#include "cudamatrix/cu-block-matrix.h"
#include "cudamatrix/cu-rand.h"

Expand Down
51 changes: 47 additions & 4 deletions src/cudamatrix/cu-matrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
#include "cudamatrix/cu-sp-matrix.h"
#include "cudamatrix/cu-tp-matrix.h"
#include "cudamatrix/cu-block-matrix.h"
#include "cudamatrix/cu-sparse-matrix.h"
#include "cudamatrix/cublas-wrappers.h"

namespace kaldi {
Expand Down Expand Up @@ -255,6 +254,53 @@ template
void CuMatrixBase<double>::CopyFromMat<double>(const CuMatrixBase<double> &M,
MatrixTransposeType Trans);

template <typename Real>
template <typename OtherReal>
void CuMatrixBase<Real>::CopyFromSmat(const CuSparseMatrix<OtherReal> &M,
MatrixTransposeType trans) {
// Sanity check.
if (trans == kNoTrans) {
KALDI_ASSERT(M.NumRows() == num_rows_ && M.NumCols() == num_cols_);
} else {
KALDI_ASSERT(M.NumCols() == num_rows_ && M.NumRows() == num_cols_);
}
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
Timer tim;
dim3 dimBlock(CU1DBLOCK, 1);
dim3 dimGrid(n_blocks(M.NumElements(), CU1DBLOCK), 1);
if (trans == kNoTrans) {
cuda_copy_from_smat(dimGrid, dimBlock, this->data_,
M.Data(), this->Dim(), M.NumElements());
} else {
cuda_copy_from_smat_trans(dimGrid, dimBlock, this->data_,
M.Data(), this->Dim(), M.NumElements());
}
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
} else
#endif
{
Mat().CopyFromSmat(M.Mat(), trans);
}
}

// Instantiate the template above.
template
void CuMatrixBase<float>::CopyFromSmat<float>(const CuSparseMatrix<float> &M,
MatrixTransposeType trans);

template
void CuMatrixBase<float>::CopyFromSmat<double>(const CuSparseMatrix<double> &M,
MatrixTransposeType trans);

template
void CuMatrixBase<double>::CopyFromSmat<float>(const CuSparseMatrix<float> &M,
MatrixTransposeType trans);

template
void CuMatrixBase<double>::CopyFromSmat<double>(const CuSparseMatrix<double> &M,
MatrixTransposeType trans);

template<typename Real>
template<typename OtherReal>
void CuMatrixBase<Real>::CopyFromTp(const CuTpMatrix<OtherReal> &M,
Expand Down Expand Up @@ -2240,9 +2286,6 @@ void CuMatrixBase<Real>::CopyFromGeneralMat(const GeneralMatrix &src,
return;
}
#endif
Matrix<BaseFloat> mat(trans == kNoTrans ? smat.NumRows() : smat.NumCols(),
trans == kNoTrans ? smat.NumCols() : smat.NumRows(),
kUndefined);
Mat().CopyFromSmat(smat, trans);
return;
}
Expand Down
1 change: 1 addition & 0 deletions src/cudamatrix/cu-matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "cudamatrix/cu-matrixdim.h"
#include "cudamatrix/cu-common.h"
#include "cudamatrix/cu-value.h"
#include "cudamatrix/cu-sparse-matrix.h"
#include "matrix/matrix-common.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/sparse-matrix.h"
Expand Down
Loading

0 comments on commit b1a372d

Please sign in to comment.