Skip to content

Commit

Permalink
basis - add GPU ApplyAddAtPoints
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremylt committed Aug 12, 2024
1 parent 3b2de1f commit aca9cec
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 20 deletions.
22 changes: 18 additions & 4 deletions backends/cuda-ref/ceed-cuda-ref-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ static int CeedBasisApplyAdd_Cuda(CeedBasis basis, const CeedInt num_elem, CeedT
//------------------------------------------------------------------------------
// Basis apply - tensor AtPoints
//------------------------------------------------------------------------------
static int CeedBasisApplyAtPoints_Cuda(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
static int CeedBasisApplyAtPointsCore_Cuda(CeedBasis basis, bool apply_add, const CeedInt num_elem, const CeedInt *num_points,
CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
Ceed ceed;
CeedInt Q_1d, dim, max_num_points = num_points[0];
const CeedInt is_transpose = t_mode == CEED_TRANSPOSE;
Expand Down Expand Up @@ -172,10 +172,11 @@ static int CeedBasisApplyAtPoints_Cuda(CeedBasis basis, const CeedInt num_elem,
CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x));
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Clear v for transpose operation
if (is_transpose) {
if (is_transpose && !apply_add) {
CeedSize length;

CeedCallBackend(CeedVectorGetLength(v, &length));
Expand Down Expand Up @@ -214,6 +215,18 @@ static int CeedBasisApplyAtPoints_Cuda(CeedBasis basis, const CeedInt num_elem,
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApplyAtPoints_Cuda(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
CeedCallBackend(CeedBasisApplyAtPointsCore_Cuda(basis, false, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApplyAddAtPoints_Cuda(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
CeedCallBackend(CeedBasisApplyAtPointsCore_Cuda(basis, true, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Basis apply - non-tensor
//------------------------------------------------------------------------------
Expand Down Expand Up @@ -403,6 +416,7 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Cuda));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Cuda));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Cuda));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAddAtPoints", CeedBasisApplyAddAtPoints_Cuda));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Cuda));
return CEED_ERROR_SUCCESS;
}
Expand Down
21 changes: 17 additions & 4 deletions backends/cuda-shared/ceed-cuda-shared-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ static int CeedBasisApplyAddTensor_Cuda_shared(CeedBasis basis, const CeedInt nu
//------------------------------------------------------------------------------
// Basis apply - tensor AtPoints
//------------------------------------------------------------------------------
static int CeedBasisApplyAtPoints_Cuda_shared(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
static int CeedBasisApplyAtPointsCore_Cuda_shared(CeedBasis basis, bool apply_add, const CeedInt num_elem, const CeedInt *num_points,
CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
Ceed ceed;
CeedInt Q_1d, dim, max_num_points = num_points[0];
const CeedInt is_transpose = t_mode == CEED_TRANSPOSE;
Expand Down Expand Up @@ -278,10 +278,11 @@ static int CeedBasisApplyAtPoints_Cuda_shared(CeedBasis basis, const CeedInt num
CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x));
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Clear v for transpose operation
if (is_transpose) {
if (is_transpose && !apply_add) {
CeedSize length;

CeedCallBackend(CeedVectorGetLength(v, &length));
Expand Down Expand Up @@ -320,6 +321,18 @@ static int CeedBasisApplyAtPoints_Cuda_shared(CeedBasis basis, const CeedInt num
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApplyAtPoints_Cuda_shared(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
CeedCallBackend(CeedBasisApplyAtPointsCore_Cuda_shared(basis, false, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApplyAddAtPoints_Cuda_shared(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
CeedCallBackend(CeedBasisApplyAtPointsCore_Cuda_shared(basis, true, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Destroy basis
//------------------------------------------------------------------------------
Expand Down
22 changes: 18 additions & 4 deletions backends/hip-ref/ceed-hip-ref-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ static int CeedBasisApplyAdd_Hip(CeedBasis basis, const CeedInt num_elem, CeedTr
//------------------------------------------------------------------------------
// Basis apply - tensor AtPoints
//------------------------------------------------------------------------------
static int CeedBasisApplyAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
static int CeedBasisApplyAtPointsCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, const CeedInt *num_points,
CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
Ceed ceed;
CeedInt Q_1d, dim, max_num_points = num_points[0];
const CeedInt is_transpose = t_mode == CEED_TRANSPOSE;
Expand Down Expand Up @@ -170,10 +170,11 @@ static int CeedBasisApplyAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, c
CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x));
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Clear v for transpose operation
if (is_transpose) {
if (is_transpose && !apply_add) {
CeedSize length;

CeedCallBackend(CeedVectorGetLength(v, &length));
Expand Down Expand Up @@ -212,6 +213,18 @@ static int CeedBasisApplyAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, c
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApplyAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, false, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApplyAddAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, true, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Basis apply - non-tensor
//------------------------------------------------------------------------------
Expand Down Expand Up @@ -401,6 +414,7 @@ int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const C
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Hip));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Hip));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Hip));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAddAtPoints", CeedBasisApplyAddAtPoints_Hip));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Hip));
return CEED_ERROR_SUCCESS;
}
Expand Down
22 changes: 18 additions & 4 deletions backends/hip-shared/ceed-hip-shared-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,8 @@ int CeedBasisApplyAddTensor_Hip_shared(CeedBasis basis, const CeedInt num_elem,
//------------------------------------------------------------------------------
// Basis apply - tensor AtPoints
//------------------------------------------------------------------------------
static int CeedBasisApplyAtPoints_Hip_shared(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
static int CeedBasisApplyAtPoints_Hip_shared(CeedBasis basis, bool apply_add, const CeedInt num_elem, const CeedInt *num_points,
CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
Ceed ceed;
CeedInt Q_1d, dim, max_num_points = num_points[0];
const CeedInt is_transpose = t_mode == CEED_TRANSPOSE;
Expand Down Expand Up @@ -337,10 +337,11 @@ static int CeedBasisApplyAtPoints_Hip_shared(CeedBasis basis, const CeedInt num_
CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x));
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Clear v for transpose operation
if (is_transpose) {
if (is_transpose && !apply_add) {
CeedSize length;

CeedCallBackend(CeedVectorGetLength(v, &length));
Expand Down Expand Up @@ -379,6 +380,18 @@ static int CeedBasisApplyAtPoints_Hip_shared(CeedBasis basis, const CeedInt num_
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApplyNonTensor_Hip_shared(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u,
CeedVector v) {
CeedCallBackend(CeedBasisApplyNonTensorCore_Hip_shared(basis, false, num_elem, t_mode, eval_mode, u, v));
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApplyAddNonTensor_Hip_shared(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode,
CeedVector u, CeedVector v) {
CeedCallBackend(CeedBasisApplyNonTensorCore_Hip_shared(basis, true, num_elem, t_mode, eval_mode, u, v));
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Destroy basis
//------------------------------------------------------------------------------
Expand Down Expand Up @@ -469,6 +482,7 @@ int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyTensor_Hip_shared));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddTensor_Hip_shared));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Hip_shared));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAddAtPoints", CeedBasisApplyAddAtPoints_Hip_shared));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Hip_shared));
return CEED_ERROR_SUCCESS;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ extern "C" __global__ void InterpAtPoints(const CeedInt num_elem, const CeedInt
CeedScalar v_k = 0;

for (CeedInt b = 0; b < Q; b++) v_k += s_chebyshev_interp_1d[j + b * BASIS_P_1D] * in[(a * Q + b) * post + c];
out[k] = v_k;
if (d == BASIS_DIM - 1) out[k] += v_k;
else out[k] = v_k;
}
post *= P;
}
Expand Down Expand Up @@ -283,7 +284,8 @@ extern "C" __global__ void GradAtPoints(const CeedInt num_elem, const CeedInt is
CeedScalar v_k = 0;

for (CeedInt b = 0; b < Q; b++) v_k += s_chebyshev_interp_1d[j + b * BASIS_P_1D] * in[(a * Q + b) * post + c];
out[k] = v_k;
if (d == BASIS_DIM - 1) out[k] += v_k;
else out[k] = v_k;
}
post *= P;
}
Expand Down
6 changes: 4 additions & 2 deletions include/ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ extern "C" __global__ void InterpAtPoints(const CeedInt num_elem, const CeedInt
CeedScalar v_k = 0;

for (CeedInt b = 0; b < Q; b++) v_k += s_chebyshev_interp_1d[j + b * BASIS_P_1D] * in[(a * Q + b) * post + c];
out[k] = v_k;
if (d == BASIS_DIM - 1) out[k] += v_k;
else out[k] = v_k;
}
post *= P;
}
Expand Down Expand Up @@ -283,7 +284,8 @@ extern "C" __global__ void GradAtPoints(const CeedInt num_elem, const CeedInt is
CeedScalar v_k = 0;

for (CeedInt b = 0; b < Q; b++) v_k += s_chebyshev_interp_1d[j + b * BASIS_P_1D] * in[(a * Q + b) * post + c];
out[k] = v_k;
if (d == BASIS_DIM - 1) out[k] += v_k;
else out[k] = v_k;
}
post *= P;
}
Expand Down
1 change: 1 addition & 0 deletions interface/ceed-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -1950,6 +1950,7 @@ int CeedBasisApplyAtPoints(CeedBasis basis, CeedInt num_elem, const CeedInt *num
**/
int CeedBasisApplyAddAtPoints(CeedBasis basis, CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, CeedEvalMode eval_mode,
CeedVector x_ref, CeedVector u, CeedVector v) {
CeedCheck(t_mode == CEED_TRANSPOSE, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED, "CeedBasisApplyAddAtPoints only supports CEED_TRANSPOSE");
CeedCall(CeedBasisApplyAtPointsCheckDims(basis, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
if (basis->ApplyAddAtPoints) {
CeedCall(basis->ApplyAddAtPoints(basis, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
Expand Down

0 comments on commit aca9cec

Please sign in to comment.