Skip to content

Commit

Permalink
Merge pull request #1231 from CEED/jeremy/consistency
Browse files Browse the repository at this point in the history
Consistency fixes
  • Loading branch information
jeremylt authored Jun 20, 2023
2 parents 2e6856d + 9ffb25e commit 4b35598
Show file tree
Hide file tree
Showing 58 changed files with 312 additions and 330 deletions.
6 changes: 3 additions & 3 deletions backends/cuda-gen/ceed-cuda-gen-operator-build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
//------------------------------------------------------------------------------
// Build singe operator kernel
//------------------------------------------------------------------------------
extern "C" int CeedCudaGenOperatorBuild(CeedOperator op) {
extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op) {
using std::ostringstream;
using std::string;
bool is_setup_done;
Expand Down Expand Up @@ -682,8 +682,8 @@ extern "C" int CeedCudaGenOperatorBuild(CeedOperator op) {
CeedDebug256(ceed, 2, "Generated Operator Kernels:\n");
CeedDebug(ceed, code.str().c_str());

CeedCallBackend(CeedCompileCuda(ceed, code.str().c_str(), &data->module, 1, "T_1D", CeedIntMax(Q_1d, data->max_P_1d)));
CeedCallBackend(CeedGetKernelCuda(ceed, data->module, operator_name.c_str(), &data->op));
CeedCallBackend(CeedCompile_Cuda(ceed, code.str().c_str(), &data->module, 1, "T_1D", CeedIntMax(Q_1d, data->max_P_1d)));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, operator_name.c_str(), &data->op));

CeedCallBackend(CeedOperatorSetSetupDone(op));
return CEED_ERROR_SUCCESS;
Expand Down
2 changes: 1 addition & 1 deletion backends/cuda-gen/ceed-cuda-gen-operator-build.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@
#ifndef _ceed_cuda_gen_operator_build_h
#define _ceed_cuda_gen_operator_build_h

CEED_INTERN int CeedCudaGenOperatorBuild(CeedOperator op);
CEED_INTERN int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op);

#endif // _ceed_cuda_gen_operator_build_h
4 changes: 2 additions & 2 deletions backends/cuda-gen/ceed-cuda-gen-operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ static int CeedOperatorApplyAdd_Cuda_gen(CeedOperator op, CeedVector input_vec,
CeedVector vec, output_vecs[CEED_FIELD_MAX] = {NULL};

// Creation of the operator
CeedCallBackend(CeedCudaGenOperatorBuild(op));
CeedCallBackend(CeedOperatorBuildKernel_Cuda_gen(op));

// Input vectors
for (CeedInt i = 0; i < num_input_fields; i++) {
Expand Down Expand Up @@ -173,7 +173,7 @@ static int CeedOperatorApplyAdd_Cuda_gen(CeedOperator op, CeedVector input_vec,
CeedChkBackend(BlockGridCalculate(num_elem, min_grid_size / cuda_data->device_prop.multiProcessorCount, max_threads_per_block,
cuda_data->device_prop.maxThreadsDim[2], cuda_data->device_prop.warpSize, block, &grid));
CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar);
CeedCallBackend(CeedRunKernelDimSharedCuda(ceed, data->op, grid, block[0], block[1], block[2], shared_mem, opargs));
CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->op, grid, block[0], block[1], block[2], shared_mem, opargs));

// Restore input arrays
for (CeedInt i = 0; i < num_input_fields; i++) {
Expand Down
4 changes: 2 additions & 2 deletions backends/cuda-gen/ceed-cuda-gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@
//------------------------------------------------------------------------------
static int CeedInit_Cuda_gen(const char *resource, Ceed ceed) {
char *resource_root;
CeedCallBackend(CeedCudaGetResourceRoot(ceed, resource, &resource_root));
CeedCallBackend(CeedGetResourceRoot(ceed, resource, ":", &resource_root));
CeedCheck(!strcmp(resource_root, "/gpu/cuda") || !strcmp(resource_root, "/gpu/cuda/gen"), ceed, CEED_ERROR_BACKEND,
"Cuda backend cannot use resource: %s", resource);
CeedCallBackend(CeedFree(&resource_root));

Ceed_Cuda *data;
CeedCallBackend(CeedCalloc(1, &data));
CeedCallBackend(CeedSetData(ceed, data));
CeedCallBackend(CeedCudaInit(ceed, resource));
CeedCallBackend(CeedInit_Cuda(ceed, resource));

Ceed ceedshared;
CeedCall(CeedInit("/gpu/cuda/shared", &ceedshared));
Expand Down
38 changes: 19 additions & 19 deletions backends/cuda-ref/ceed-cuda-ref-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,18 @@ int CeedBasisApply_Cuda(CeedBasis basis, const CeedInt num_elem, CeedTransposeMo
void *interp_args[] = {(void *)&num_elem, (void *)&transpose, &data->d_interp_1d, &d_u, &d_v};
CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);

CeedCallBackend(CeedRunKernelCuda(ceed, data->Interp, num_elem, block_size, interp_args));
CeedCallBackend(CeedRunKernel_Cuda(ceed, data->Interp, num_elem, block_size, interp_args));
} break;
case CEED_EVAL_GRAD: {
void *grad_args[] = {(void *)&num_elem, (void *)&transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v};
CeedInt block_size = max_block_size;

CeedCallBackend(CeedRunKernelCuda(ceed, data->Grad, num_elem, block_size, grad_args));
CeedCallBackend(CeedRunKernel_Cuda(ceed, data->Grad, num_elem, block_size, grad_args));
} break;
case CEED_EVAL_WEIGHT: {
void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v};
const int grid_size = num_elem;
CeedCallBackend(CeedRunKernelDimCuda(ceed, data->Weight, grid_size, Q_1d, dim >= 2 ? Q_1d : 1, 1, weight_args));
CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid_size, Q_1d, dim >= 2 ? Q_1d : 1, 1, weight_args));
} break;
// LCOV_EXCL_START
// Evaluate the divergence to/from the quadrature points
Expand Down Expand Up @@ -123,22 +123,22 @@ int CeedBasisApplyNonTensor_Cuda(CeedBasis basis, const CeedInt num_elem, CeedTr
case CEED_EVAL_INTERP: {
void *interp_args[] = {(void *)&num_elem, (void *)&transpose, &data->d_interp, &d_u, &d_v};
if (transpose) {
CeedCallBackend(CeedRunKernelDimCuda(ceed, data->Interp, grid, num_nodes, 1, elems_per_block, interp_args));
CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Interp, grid, num_nodes, 1, elems_per_block, interp_args));
} else {
CeedCallBackend(CeedRunKernelDimCuda(ceed, data->Interp, grid, num_qpts, 1, elems_per_block, interp_args));
CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Interp, grid, num_qpts, 1, elems_per_block, interp_args));
}
} break;
case CEED_EVAL_GRAD: {
void *grad_args[] = {(void *)&num_elem, (void *)&transpose, &data->d_grad, &d_u, &d_v};
if (transpose) {
CeedCallBackend(CeedRunKernelDimCuda(ceed, data->Grad, grid, num_nodes, 1, elems_per_block, grad_args));
CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Grad, grid, num_nodes, 1, elems_per_block, grad_args));
} else {
CeedCallBackend(CeedRunKernelDimCuda(ceed, data->Grad, grid, num_qpts, 1, elems_per_block, grad_args));
CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Grad, grid, num_qpts, 1, elems_per_block, grad_args));
}
} break;
case CEED_EVAL_WEIGHT: {
void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight, &d_v};
CeedCallBackend(CeedRunKernelDimCuda(ceed, data->Weight, grid, num_qpts, 1, elems_per_block, weight_args));
CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid, num_qpts, 1, elems_per_block, weight_args));
} break;
// LCOV_EXCL_START
// Evaluate the divergence to/from the quadrature points
Expand Down Expand Up @@ -231,12 +231,12 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const
CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source Complete! -----\n");
CeedCallBackend(CeedCompileCuda(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
num_comp * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
"BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim)));
CeedCallBackend(CeedGetKernelCuda(ceed, data->module, "Interp", &data->Interp));
CeedCallBackend(CeedGetKernelCuda(ceed, data->module, "Grad", &data->Grad));
CeedCallBackend(CeedGetKernelCuda(ceed, data->module, "Weight", &data->Weight));
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
num_comp * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
"BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim)));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Grad", &data->Grad));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight));
CeedCallBackend(CeedFree(&basis_kernel_path));
CeedCallBackend(CeedFree(&basis_kernel_source));

Expand Down Expand Up @@ -278,11 +278,11 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes
CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source Complete! -----\n");
CeedCallCuda(ceed, CeedCompileCuda(ceed, basis_kernel_source, &data->module, 4, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_DIM", dim,
"BASIS_NUM_COMP", num_comp));
CeedCallCuda(ceed, CeedGetKernelCuda(ceed, data->module, "Interp", &data->Interp));
CeedCallCuda(ceed, CeedGetKernelCuda(ceed, data->module, "Grad", &data->Grad));
CeedCallCuda(ceed, CeedGetKernelCuda(ceed, data->module, "Weight", &data->Weight));
CeedCallCuda(ceed, CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 4, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_DIM", dim,
"BASIS_NUM_COMP", num_comp));
CeedCallCuda(ceed, CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
CeedCallCuda(ceed, CeedGetKernel_Cuda(ceed, data->module, "Grad", &data->Grad));
CeedCallCuda(ceed, CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight));
CeedCallBackend(CeedFree(&basis_kernel_path));
CeedCallBackend(CeedFree(&basis_kernel_source));

Expand Down
20 changes: 10 additions & 10 deletions backends/cuda-ref/ceed-cuda-ref-operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -729,10 +729,10 @@ static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op, const
CeedCallBackend(CeedBasisGetNumNodes(basisin, &nnodes));
CeedCallBackend(CeedBasisGetNumQuadraturePoints(basisin, &nqpts));
diag->nnodes = nnodes;
CeedCallCuda(ceed, CeedCompileCuda(ceed, diagonal_kernel_source, &diag->module, 5, "NUMEMODEIN", numemodein, "NUMEMODEOUT", numemodeout, "NNODES",
nnodes, "NQPTS", nqpts, "NCOMP", ncomp));
CeedCallCuda(ceed, CeedGetKernelCuda(ceed, diag->module, "linearDiagonal", &diag->linearDiagonal));
CeedCallCuda(ceed, CeedGetKernelCuda(ceed, diag->module, "linearPointBlockDiagonal", &diag->linearPointBlock));
CeedCallCuda(ceed, CeedCompile_Cuda(ceed, diagonal_kernel_source, &diag->module, 5, "NUMEMODEIN", numemodein, "NUMEMODEOUT", numemodeout, "NNODES",
nnodes, "NQPTS", nqpts, "NCOMP", ncomp));
CeedCallCuda(ceed, CeedGetKernel_Cuda(ceed, diag->module, "linearDiagonal", &diag->linearDiagonal));
CeedCallCuda(ceed, CeedGetKernel_Cuda(ceed, diag->module, "linearPointBlockDiagonal", &diag->linearPointBlock));
CeedCallBackend(CeedFree(&diagonal_kernel_path));
CeedCallBackend(CeedFree(&diagonal_kernel_source));

Expand Down Expand Up @@ -836,9 +836,9 @@ static inline int CeedOperatorAssembleDiagonalCore_Cuda(CeedOperator op, CeedVec
void *args[] = {(void *)&nelem, &diag->d_identity, &diag->d_interpin, &diag->d_gradin, &diag->d_interpout,
&diag->d_gradout, &diag->d_emodein, &diag->d_emodeout, &assembledqfarray, &elemdiagarray};
if (pointBlock) {
CeedCallBackend(CeedRunKernelDimCuda(ceed, diag->linearPointBlock, grid, diag->nnodes, 1, elemsPerBlock, args));
CeedCallBackend(CeedRunKernelDim_Cuda(ceed, diag->linearPointBlock, grid, diag->nnodes, 1, elemsPerBlock, args));
} else {
CeedCallBackend(CeedRunKernelDimCuda(ceed, diag->linearDiagonal, grid, diag->nnodes, 1, elemsPerBlock, args));
CeedCallBackend(CeedRunKernelDim_Cuda(ceed, diag->linearDiagonal, grid, diag->nnodes, 1, elemsPerBlock, args));
}

// Restore arrays
Expand Down Expand Up @@ -985,9 +985,9 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op) {
asmb->block_size_x = esize;
asmb->block_size_y = esize;
}
CeedCallCuda(ceed, CeedCompileCuda(ceed, assembly_kernel_source, &asmb->module, 7, "NELEM", nelem, "NUMEMODEIN", num_emode_in, "NUMEMODEOUT",
num_emode_out, "NQPTS", nqpts, "NNODES", esize, "BLOCK_SIZE", block_size, "NCOMP", ncomp));
CeedCallCuda(ceed, CeedGetKernelCuda(ceed, asmb->module, fallback ? "linearAssembleFallback" : "linearAssemble", &asmb->linearAssemble));
CeedCallCuda(ceed, CeedCompile_Cuda(ceed, assembly_kernel_source, &asmb->module, 7, "NELEM", nelem, "NUMEMODEIN", num_emode_in, "NUMEMODEOUT",
num_emode_out, "NQPTS", nqpts, "NNODES", esize, "BLOCK_SIZE", block_size, "NCOMP", ncomp));
CeedCallCuda(ceed, CeedGetKernel_Cuda(ceed, asmb->module, fallback ? "linearAssembleFallback" : "linearAssemble", &asmb->linearAssemble));
CeedCallBackend(CeedFree(&assembly_kernel_path));
CeedCallBackend(CeedFree(&assembly_kernel_source));

Expand Down Expand Up @@ -1076,7 +1076,7 @@ static int CeedSingleOperatorAssemble_Cuda(CeedOperator op, CeedInt offset, Ceed
const CeedInt grid = nelem / elemsPerBlock + ((nelem / elemsPerBlock * elemsPerBlock < nelem) ? 1 : 0);
void *args[] = {&impl->asmb->d_B_in, &impl->asmb->d_B_out, &qf_array, &values_array};
CeedCallBackend(
CeedRunKernelDimCuda(ceed, impl->asmb->linearAssemble, grid, impl->asmb->block_size_x, impl->asmb->block_size_y, elemsPerBlock, args));
CeedRunKernelDim_Cuda(ceed, impl->asmb->linearAssemble, grid, impl->asmb->block_size_x, impl->asmb->block_size_y, elemsPerBlock, args));

// Restore arrays
CeedCallBackend(CeedVectorRestoreArray(values, &values_array));
Expand Down
6 changes: 3 additions & 3 deletions backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
//------------------------------------------------------------------------------
// Build QFunction kernel
//------------------------------------------------------------------------------
extern "C" int CeedCudaBuildQFunction(CeedQFunction qf) {
extern "C" int CeedQFunctionBuildKernel_Cuda_ref(CeedQFunction qf) {
using std::ostringstream;
using std::string;
Ceed ceed;
Expand Down Expand Up @@ -109,8 +109,8 @@ extern "C" int CeedCudaBuildQFunction(CeedQFunction qf) {
CeedDebug(ceed, code.str().c_str());

// Compile kernel
CeedCallBackend(CeedCompileCuda(ceed, code.str().c_str(), &data->module, 0));
CeedCallBackend(CeedGetKernelCuda(ceed, data->module, kernel_name.c_str(), &data->QFunction));
CeedCallBackend(CeedCompile_Cuda(ceed, code.str().c_str(), &data->module, 0));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, kernel_name.c_str(), &data->QFunction));

// Cleanup
CeedCallBackend(CeedFree(&data->qfunction_source));
Expand Down
2 changes: 1 addition & 1 deletion backends/cuda-ref/ceed-cuda-ref-qfunction-load.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@
#ifndef _ceed_cuda_qfunction_load_h
#define _ceed_cuda_qfunction_load_h

CEED_INTERN int CeedCudaBuildQFunction(CeedQFunction qf);
CEED_INTERN int CeedQFunctionBuildKernel_Cuda_ref(CeedQFunction qf);

#endif // _ceed_cuda_qfunction_load_h
2 changes: 1 addition & 1 deletion backends/cuda-ref/ceed-cuda-ref-qfunction.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ static int CeedQFunctionApply_Cuda(CeedQFunction qf, CeedInt Q, CeedVector *U, C
CeedCallBackend(CeedQFunctionGetCeed(qf, &ceed));

// Build and compile kernel, if not done
CeedCallBackend(CeedCudaBuildQFunction(qf));
CeedCallBackend(CeedQFunctionBuildKernel_Cuda_ref(qf));

CeedQFunction_Cuda *data;
CeedCallBackend(CeedQFunctionGetData(qf, &data));
Expand Down
10 changes: 5 additions & 5 deletions backends/cuda-ref/ceed-cuda-ref.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ static int CeedGetPreferredMemType_Cuda(CeedMemType *mem_type) {
//------------------------------------------------------------------------------
// Get CUBLAS handle
//------------------------------------------------------------------------------
int CeedCudaGetCublasHandle(Ceed ceed, cublasHandle_t *handle) {
int CeedGetCublasHandle_Cuda(Ceed ceed, cublasHandle_t *handle) {
Ceed_Cuda *data;
CeedCallBackend(CeedGetData(ceed, &data));

Expand All @@ -38,17 +38,17 @@ int CeedCudaGetCublasHandle(Ceed ceed, cublasHandle_t *handle) {
//------------------------------------------------------------------------------
// Backend Init
//------------------------------------------------------------------------------
static int CeedInit_Cuda(const char *resource, Ceed ceed) {
static int CeedInit_Cuda_ref(const char *resource, Ceed ceed) {
char *resource_root;
CeedCallBackend(CeedCudaGetResourceRoot(ceed, resource, &resource_root));
CeedCallBackend(CeedGetResourceRoot(ceed, resource, ":", &resource_root));
CeedCheck(!strcmp(resource_root, "/gpu/cuda/ref"), ceed, CEED_ERROR_BACKEND, "Cuda backend cannot use resource: %s", resource);
CeedCallBackend(CeedFree(&resource_root));
CeedCallBackend(CeedSetDeterministic(ceed, true));

Ceed_Cuda *data;
CeedCallBackend(CeedCalloc(1, &data));
CeedCallBackend(CeedSetData(ceed, data));
CeedCallBackend(CeedCudaInit(ceed, resource));
CeedCallBackend(CeedInit_Cuda(ceed, resource));

CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "GetPreferredMemType", CeedGetPreferredMemType_Cuda));
CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "VectorCreate", CeedVectorCreate_Cuda));
Expand All @@ -65,6 +65,6 @@ static int CeedInit_Cuda(const char *resource, Ceed ceed) {
//------------------------------------------------------------------------------
// Backend Register
//------------------------------------------------------------------------------
CEED_INTERN int CeedRegister_Cuda(void) { return CeedRegister("/gpu/cuda/ref", CeedInit_Cuda, 40); }
CEED_INTERN int CeedRegister_Cuda(void) { return CeedRegister("/gpu/cuda/ref", CeedInit_Cuda_ref, 40); }

//------------------------------------------------------------------------------
2 changes: 1 addition & 1 deletion backends/cuda-ref/ceed-cuda-ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ typedef struct {
CeedOperatorAssemble_Cuda *asmb;
} CeedOperator_Cuda;

CEED_INTERN int CeedCudaGetCublasHandle(Ceed ceed, cublasHandle_t *handle);
CEED_INTERN int CeedGetCublasHandle_Cuda(Ceed ceed, cublasHandle_t *handle);

CEED_INTERN int CeedVectorCreate_Cuda(CeedSize n, CeedVector vec);

Expand Down
Loading

0 comments on commit 4b35598

Please sign in to comment.