1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stddef.h> 20 #include "ceed-cuda-gen.h" 21 #include "ceed-cuda-gen-operator-build.h" 22 #include "../cuda/ceed-cuda.h" 23 24 //------------------------------------------------------------------------------ 25 // Destroy operator 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorDestroy_Cuda_gen(CeedOperator op) { 28 int ierr; 29 CeedOperator_Cuda_gen *impl; 30 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 31 ierr = CeedFree(&impl); CeedChkBackend(ierr); 32 return CEED_ERROR_SUCCESS; 33 } 34 35 //------------------------------------------------------------------------------ 36 // Apply and add to output 37 //------------------------------------------------------------------------------ 38 static int CeedOperatorApplyAdd_Cuda_gen(CeedOperator op, CeedVector invec, 39 CeedVector outvec, CeedRequest *request) { 40 int ierr; 41 Ceed ceed; 42 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 43 CeedOperator_Cuda_gen *data; 44 ierr = CeedOperatorGetData(op, &data); CeedChkBackend(ierr); 45 CeedQFunction qf; 46 CeedQFunction_Cuda_gen *qf_data; 47 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 48 ierr = CeedQFunctionGetData(qf, &qf_data); CeedChkBackend(ierr); 49 CeedInt nelem, numinputfields, numoutputfields; 50 ierr = CeedOperatorGetNumElements(op, &nelem); CeedChkBackend(ierr); 51 ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 52 CeedChkBackend(ierr); 53 CeedOperatorField *opinputfields, *opoutputfields; 54 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 55 CeedChkBackend(ierr); 56 CeedQFunctionField *qfinputfields, *qfoutputfields; 57 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 58 CeedChkBackend(ierr); 59 CeedEvalMode emode; 60 CeedVector vec, outvecs[16] = {}; 61 62 // Creation of the operator 63 ierr = CeedCudaGenOperatorBuild(op); CeedChkBackend(ierr); 64 65 // Input vectors 66 for (CeedInt i = 0; i < numinputfields; i++) { 67 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 68 CeedChkBackend(ierr); 69 if (emode == CEED_EVAL_WEIGHT) { // Skip 70 data->fields.in[i] = NULL; 71 } else { 72 // Get input vector 73 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 74 if (vec == CEED_VECTOR_ACTIVE) vec = invec; 75 ierr = CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.in[i]); 76 CeedChkBackend(ierr); 77 } 78 } 79 80 // Output vectors 81 for (CeedInt i = 0; i < numoutputfields; i++) { 82 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 83 CeedChkBackend(ierr); 84 if (emode == CEED_EVAL_WEIGHT) { // Skip 85 data->fields.out[i] = NULL; 86 } else { 87 // Get output vector 88 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); 89 CeedChkBackend(ierr); 90 if (vec == CEED_VECTOR_ACTIVE) vec = outvec; 91 outvecs[i] = vec; 92 // Check for multiple output modes 93 CeedInt index = -1; 94 for (CeedInt j = 0; j < i; j++) { 95 if (vec == outvecs[j]) { 96 index = j; 97 break; 98 } 99 } 100 if (index == -1) { 101 ierr = CeedVectorGetArray(vec, CEED_MEM_DEVICE, &data->fields.out[i]); 102 CeedChkBackend(ierr); 103 } else { 104 data->fields.out[i] = data->fields.out[index]; 105 } 106 } 107 } 108 109 // Get context data 110 CeedQFunctionContext ctx; 111 ierr = CeedQFunctionGetInnerContext(qf, &ctx); CeedChkBackend(ierr); 112 if (ctx) { 113 ierr = CeedQFunctionContextGetData(ctx, CEED_MEM_DEVICE, &qf_data->d_c); 114 CeedChkBackend(ierr); 115 } 116 117 // Apply operator 118 void *opargs[] = {(void *) &nelem, &qf_data->d_c, &data->indices, 119 &data->fields, &data->B, &data->G, &data->W 120 }; 121 const CeedInt dim = data->dim; 122 const CeedInt Q1d = data->Q1d; 123 const CeedInt P1d = data->maxP1d; 124 const CeedInt thread1d = CeedIntMax(Q1d, P1d); 125 if (dim==1) { 126 const CeedInt elemsPerBlock = 32; 127 CeedInt grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem) 128 ? 1 : 0 ); 129 CeedInt sharedMem = elemsPerBlock*thread1d*sizeof(CeedScalar); 130 ierr = CeedRunKernelDimSharedCuda(ceed, data->op, grid, thread1d, 1, 131 elemsPerBlock, sharedMem, opargs); 132 } else if (dim==2) { 133 const CeedInt elemsPerBlock = thread1d<4? 16 : 2; 134 CeedInt grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem) 135 ? 1 : 0 ); 136 CeedInt sharedMem = elemsPerBlock*thread1d*thread1d*sizeof(CeedScalar); 137 ierr = CeedRunKernelDimSharedCuda(ceed, data->op, grid, thread1d, thread1d, 138 elemsPerBlock, sharedMem, opargs); 139 } else if (dim==3) { 140 const CeedInt elemsPerBlock = thread1d<6? 4 : (thread1d<8? 2 : 1); 141 CeedInt grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem) 142 ? 1 : 0 ); 143 CeedInt sharedMem = elemsPerBlock*thread1d*thread1d*sizeof(CeedScalar); 144 ierr = CeedRunKernelDimSharedCuda(ceed, data->op, grid, thread1d, thread1d, 145 elemsPerBlock, sharedMem, opargs); 146 } 147 CeedChkBackend(ierr); 148 149 // Restore input arrays 150 for (CeedInt i = 0; i < numinputfields; i++) { 151 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 152 CeedChkBackend(ierr); 153 if (emode == CEED_EVAL_WEIGHT) { // Skip 154 } else { 155 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 156 if (vec == CEED_VECTOR_ACTIVE) vec = invec; 157 ierr = CeedVectorRestoreArrayRead(vec, &data->fields.in[i]); 158 CeedChkBackend(ierr); 159 } 160 } 161 162 // Restore output arrays 163 for (CeedInt i = 0; i < numoutputfields; i++) { 164 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 165 CeedChkBackend(ierr); 166 if (emode == CEED_EVAL_WEIGHT) { // Skip 167 } else { 168 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); 169 CeedChkBackend(ierr); 170 if (vec == CEED_VECTOR_ACTIVE) vec = outvec; 171 // Check for multiple output modes 172 CeedInt index = -1; 173 for (CeedInt j = 0; j < i; j++) { 174 if (vec == outvecs[j]) { 175 index = j; 176 break; 177 } 178 } 179 if (index == -1) { 180 ierr = CeedVectorRestoreArray(vec, &data->fields.out[i]); 181 CeedChkBackend(ierr); 182 } 183 } 184 } 185 186 // Restore context data 187 if (ctx) { 188 ierr = CeedQFunctionContextRestoreData(ctx, &qf_data->d_c); 189 CeedChkBackend(ierr); 190 } 191 return CEED_ERROR_SUCCESS; 192 } 193 194 //------------------------------------------------------------------------------ 195 // Create FDM element inverse not supported 196 //------------------------------------------------------------------------------ 197 static int CeedOperatorCreateFDMElementInverse_Cuda(CeedOperator op) { 198 // LCOV_EXCL_START 199 int ierr; 200 Ceed ceed; 201 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 202 return CeedError(ceed, CEED_ERROR_BACKEND, 203 "Backend does not implement FDM inverse creation"); 204 // LCOV_EXCL_STOP 205 } 206 207 //------------------------------------------------------------------------------ 208 // Create operator 209 //------------------------------------------------------------------------------ 210 int CeedOperatorCreate_Cuda_gen(CeedOperator op) { 211 int ierr; 212 Ceed ceed; 213 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 214 CeedOperator_Cuda_gen *impl; 215 216 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 217 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 218 219 ierr = CeedSetBackendFunction(ceed, "Operator", op, "CreateFDMElementInverse", 220 CeedOperatorCreateFDMElementInverse_Cuda); 221 CeedChkBackend(ierr); 222 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 223 CeedOperatorApplyAdd_Cuda_gen); CeedChkBackend(ierr); 224 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 225 CeedOperatorDestroy_Cuda_gen); CeedChkBackend(ierr); 226 return CEED_ERROR_SUCCESS; 227 } 228 //------------------------------------------------------------------------------ 229