1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-hip-gen.h" 18 #include "ceed-hip-gen-operator-build.h" 19 #include "../hip/ceed-hip-compile.h" 20 21 //------------------------------------------------------------------------------ 22 // Destroy operator 23 //------------------------------------------------------------------------------ 24 static int CeedOperatorDestroy_Hip_gen(CeedOperator op) { 25 int ierr; 26 CeedOperator_Hip_gen *impl; 27 ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr); 28 ierr = CeedFree(&impl); CeedChk(ierr); 29 return 0; 30 } 31 32 //------------------------------------------------------------------------------ 33 // Apply and add to output 34 //------------------------------------------------------------------------------ 35 static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector invec, 36 CeedVector outvec, CeedRequest *request) { 37 int ierr; 38 Ceed ceed; 39 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 40 CeedOperator_Hip_gen *data; 41 ierr = CeedOperatorGetData(op, &data); CeedChk(ierr); 42 CeedQFunction qf; 43 CeedQFunction_Hip_gen *qf_data; 44 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 45 ierr = CeedQFunctionGetData(qf, &qf_data); CeedChk(ierr); 46 CeedInt nelem, numinputfields, numoutputfields; 47 ierr = CeedOperatorGetNumElements(op, &nelem); CeedChk(ierr); 48 ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 49 CeedChk(ierr); 50 CeedOperatorField *opinputfields, *opoutputfields; 51 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 52 CeedChk(ierr); 53 CeedQFunctionField *qfinputfields, *qfoutputfields; 54 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 55 CeedChk(ierr); 56 CeedEvalMode emode; 57 CeedVector vec, outvecs[16] = {}; 58 59 //Creation of the operator 60 ierr = CeedHipGenOperatorBuild(op); CeedChk(ierr); 61 62 // Input vectors 63 for (CeedInt i = 0; i < numinputfields; i++) { 64 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 65 CeedChk(ierr); 66 if (emode == CEED_EVAL_WEIGHT) { // Skip 67 data->fields.in[i] = NULL; 68 } else { 69 // Get input vector 70 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 71 if (vec == CEED_VECTOR_ACTIVE) vec = invec; 72 ierr = CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.in[i]); 73 CeedChk(ierr); 74 } 75 } 76 77 // Output vectors 78 for (CeedInt i = 0; i < numoutputfields; i++) { 79 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 80 CeedChk(ierr); 81 if (emode == CEED_EVAL_WEIGHT) { // Skip 82 data->fields.out[i] = NULL; 83 } else { 84 // Get output vector 85 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 86 if (vec == CEED_VECTOR_ACTIVE) vec = outvec; 87 outvecs[i] = vec; 88 // Check for multiple output modes 89 CeedInt index = -1; 90 for (CeedInt j = 0; j < i; j++) { 91 if (vec == outvecs[j]) { 92 index = j; 93 break; 94 } 95 } 96 if (index == -1) { 97 ierr = CeedVectorGetArray(vec, CEED_MEM_DEVICE, &data->fields.out[i]); 98 CeedChk(ierr); 99 } else { 100 data->fields.out[i] = data->fields.out[index]; 101 } 102 } 103 } 104 105 // Get context data 106 CeedQFunctionContext ctx; 107 ierr = CeedQFunctionGetInnerContext(qf, &ctx); CeedChk(ierr); 108 if (ctx) { 109 ierr = CeedQFunctionContextGetData(ctx, CEED_MEM_DEVICE, &qf_data->d_c); 110 CeedChk(ierr); 111 } 112 113 // Apply operator 114 void *opargs[] = {(void *) &nelem, &qf_data->d_c, &data->indices, 115 &data->fields, &data->B, &data->G, &data->W 116 }; 117 const CeedInt dim = data->dim; 118 const CeedInt Q1d = data->Q1d; 119 const CeedInt P1d = data->maxP1d; 120 const CeedInt thread1d = CeedIntMax(Q1d, P1d); 121 if (dim==1) { 122 CeedInt elemsPerBlock = 64*thread1d > 256? 256/thread1d : 64; 123 elemsPerBlock = elemsPerBlock>0?elemsPerBlock:1; 124 CeedInt grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem) 125 ? 1 : 0 ); 126 CeedInt sharedMem = elemsPerBlock*thread1d*sizeof(CeedScalar); 127 ierr = CeedRunKernelDimSharedHip(ceed, data->op, grid, thread1d, 1, 128 elemsPerBlock, sharedMem, opargs); 129 } else if (dim==2) { 130 const CeedInt elemsPerBlock = thread1d<4? 16 : 2; 131 CeedInt grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem) 132 ? 1 : 0 ); 133 CeedInt sharedMem = elemsPerBlock*thread1d*thread1d*sizeof(CeedScalar); 134 ierr = CeedRunKernelDimSharedHip(ceed, data->op, grid, thread1d, thread1d, 135 elemsPerBlock, sharedMem, opargs); 136 } else if (dim==3) { 137 const CeedInt elemsPerBlock = thread1d<6? 4 : (thread1d<8? 2 : 1); 138 CeedInt grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem) 139 ? 1 : 0 ); 140 CeedInt sharedMem = elemsPerBlock*thread1d*thread1d*sizeof(CeedScalar); 141 ierr = CeedRunKernelDimSharedHip(ceed, data->op, grid, thread1d, thread1d, 142 elemsPerBlock, sharedMem, opargs); 143 } 144 CeedChk(ierr); 145 146 // Restore input arrays 147 for (CeedInt i = 0; i < numinputfields; i++) { 148 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 149 CeedChk(ierr); 150 if (emode == CEED_EVAL_WEIGHT) { // Skip 151 } else { 152 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 153 if (vec == CEED_VECTOR_ACTIVE) vec = invec; 154 ierr = CeedVectorRestoreArrayRead(vec, &data->fields.in[i]); 155 CeedChk(ierr); 156 } 157 } 158 159 // Restore output arrays 160 for (CeedInt i = 0; i < numoutputfields; i++) { 161 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 162 CeedChk(ierr); 163 if (emode == CEED_EVAL_WEIGHT) { // Skip 164 } else { 165 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 166 if (vec == CEED_VECTOR_ACTIVE) vec = outvec; 167 // Check for multiple output modes 168 CeedInt index = -1; 169 for (CeedInt j = 0; j < i; j++) { 170 if (vec == outvecs[j]) { 171 index = j; 172 break; 173 } 174 } 175 if (index == -1) { 176 ierr = CeedVectorRestoreArray(vec, &data->fields.out[i]); 177 CeedChk(ierr); 178 } 179 } 180 } 181 182 // Restore context data 183 if (ctx) { 184 ierr = CeedQFunctionContextRestoreData(ctx, &qf_data->d_c); 185 CeedChk(ierr); 186 } 187 return 0; 188 } 189 190 //------------------------------------------------------------------------------ 191 // Create FDM element inverse not supported 192 //------------------------------------------------------------------------------ 193 static int CeedOperatorCreateFDMElementInverse_Hip(CeedOperator op) { 194 // LCOV_EXCL_START 195 int ierr; 196 Ceed ceed; 197 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 198 return CeedError(ceed, 1, "Backend does not implement FDM inverse creation"); 199 // LCOV_EXCL_STOP 200 } 201 202 //------------------------------------------------------------------------------ 203 // Create operator 204 //------------------------------------------------------------------------------ 205 int CeedOperatorCreate_Hip_gen(CeedOperator op) { 206 int ierr; 207 Ceed ceed; 208 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 209 CeedOperator_Hip_gen *impl; 210 211 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 212 ierr = CeedOperatorSetData(op, impl); CeedChk(ierr); 213 214 ierr = CeedSetBackendFunction(ceed, "Operator", op, "CreateFDMElementInverse", 215 CeedOperatorCreateFDMElementInverse_Hip); 216 CeedChk(ierr); 217 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 218 CeedOperatorApplyAdd_Hip_gen); CeedChk(ierr); 219 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 220 CeedOperatorDestroy_Hip_gen); CeedChk(ierr); 221 return 0; 222 } 223 //------------------------------------------------------------------------------ 224