1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stddef.h> 20 #include "ceed-hip-gen.h" 21 #include "ceed-hip-gen-operator-build.h" 22 #include "../hip/ceed-hip-compile.h" 23 24 //------------------------------------------------------------------------------ 25 // Destroy operator 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorDestroy_Hip_gen(CeedOperator op) { 28 int ierr; 29 CeedOperator_Hip_gen *impl; 30 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 31 ierr = CeedFree(&impl); CeedChkBackend(ierr); 32 return CEED_ERROR_SUCCESS; 33 } 34 35 //------------------------------------------------------------------------------ 36 // Apply and add to output 37 //------------------------------------------------------------------------------ 38 static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector invec, 39 CeedVector outvec, CeedRequest *request) { 40 int ierr; 41 Ceed ceed; 42 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 43 CeedOperator_Hip_gen *data; 44 ierr = CeedOperatorGetData(op, &data); CeedChkBackend(ierr); 45 CeedQFunction qf; 46 CeedQFunction_Hip_gen *qf_data; 47 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 48 ierr = CeedQFunctionGetData(qf, &qf_data); CeedChkBackend(ierr); 49 CeedInt nelem, numinputfields, numoutputfields; 50 ierr = CeedOperatorGetNumElements(op, &nelem); CeedChkBackend(ierr); 51 CeedOperatorField *opinputfields, *opoutputfields; 52 ierr = CeedOperatorGetFields(op, &numinputfields, &opinputfields, 53 &numoutputfields, &opoutputfields); 54 CeedChkBackend(ierr); 55 CeedQFunctionField *qfinputfields, *qfoutputfields; 56 ierr = CeedQFunctionGetFields(qf, NULL, &qfinputfields, NULL, &qfoutputfields); 57 CeedChkBackend(ierr); 58 CeedEvalMode emode; 59 CeedVector vec, outvecs[16] = {}; 60 61 //Creation of the operator 62 ierr = CeedHipGenOperatorBuild(op); CeedChkBackend(ierr); 63 64 // Input vectors 65 for (CeedInt i = 0; i < numinputfields; i++) { 66 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 67 CeedChkBackend(ierr); 68 if (emode == CEED_EVAL_WEIGHT) { // Skip 69 data->fields.in[i] = NULL; 70 } else { 71 // Get input vector 72 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 73 if (vec == CEED_VECTOR_ACTIVE) vec = invec; 74 ierr = CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.in[i]); 75 CeedChkBackend(ierr); 76 } 77 } 78 79 // Output vectors 80 for (CeedInt i = 0; i < numoutputfields; i++) { 81 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 82 CeedChkBackend(ierr); 83 if (emode == CEED_EVAL_WEIGHT) { // Skip 84 data->fields.out[i] = NULL; 85 } else { 86 // Get output vector 87 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); 88 CeedChkBackend(ierr); 89 if (vec == CEED_VECTOR_ACTIVE) vec = outvec; 90 outvecs[i] = vec; 91 // Check for multiple output modes 92 CeedInt index = -1; 93 for (CeedInt j = 0; j < i; j++) { 94 if (vec == outvecs[j]) { 95 index = j; 96 break; 97 } 98 } 99 if (index == -1) { 100 ierr = CeedVectorGetArray(vec, CEED_MEM_DEVICE, &data->fields.out[i]); 101 CeedChkBackend(ierr); 102 } else { 103 data->fields.out[i] = data->fields.out[index]; 104 } 105 } 106 } 107 108 // Get context data 109 CeedQFunctionContext ctx; 110 ierr = CeedQFunctionGetInnerContext(qf, &ctx); CeedChkBackend(ierr); 111 if (ctx) { 112 ierr = CeedQFunctionContextGetData(ctx, CEED_MEM_DEVICE, &qf_data->d_c); 113 CeedChkBackend(ierr); 114 } 115 116 // Apply operator 117 void *opargs[] = {(void *) &nelem, &qf_data->d_c, &data->indices, 118 &data->fields, &data->B, &data->G, &data->W 119 }; 120 const CeedInt dim = data->dim; 121 const CeedInt Q1d = data->Q1d; 122 const CeedInt P1d = data->maxP1d; 123 const CeedInt thread1d = CeedIntMax(Q1d, P1d); 124 CeedInt block_sizes[3]; 125 ierr = BlockGridCalculate(dim, nelem, P1d, Q1d, block_sizes); 126 CeedChkBackend(ierr); 127 if (dim==1) { 128 CeedInt grid = nelem/block_sizes[2] + ( ( 129 nelem/block_sizes[2]*block_sizes[2]<nelem) 130 ? 1 : 0 ); 131 CeedInt sharedMem = block_sizes[2]*thread1d*sizeof(CeedScalar); 132 ierr = CeedRunKernelDimSharedHip(ceed, data->op, grid, block_sizes[0], 133 block_sizes[1], 134 block_sizes[2], sharedMem, opargs); 135 } else if (dim==2) { 136 CeedInt grid = nelem/block_sizes[2] + ( ( 137 nelem/block_sizes[2]*block_sizes[2]<nelem) 138 ? 1 : 0 ); 139 CeedInt sharedMem = block_sizes[2]*thread1d*thread1d*sizeof(CeedScalar); 140 ierr = CeedRunKernelDimSharedHip(ceed, data->op, grid, block_sizes[0], 141 block_sizes[1], 142 block_sizes[2], sharedMem, opargs); 143 } else if (dim==3) { 144 CeedInt grid = nelem/block_sizes[2] + ( ( 145 nelem/block_sizes[2]*block_sizes[2]<nelem) 146 ? 1 : 0 ); 147 CeedInt sharedMem = block_sizes[2]*thread1d*thread1d*sizeof(CeedScalar); 148 ierr = CeedRunKernelDimSharedHip(ceed, data->op, grid, block_sizes[0], 149 block_sizes[1], 150 block_sizes[2], sharedMem, opargs); 151 } 152 CeedChkBackend(ierr); 153 154 // Restore input arrays 155 for (CeedInt i = 0; i < numinputfields; i++) { 156 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 157 CeedChkBackend(ierr); 158 if (emode == CEED_EVAL_WEIGHT) { // Skip 159 } else { 160 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 161 if (vec == CEED_VECTOR_ACTIVE) vec = invec; 162 ierr = CeedVectorRestoreArrayRead(vec, &data->fields.in[i]); 163 CeedChkBackend(ierr); 164 } 165 } 166 167 // Restore output arrays 168 for (CeedInt i = 0; i < numoutputfields; i++) { 169 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 170 CeedChkBackend(ierr); 171 if (emode == CEED_EVAL_WEIGHT) { // Skip 172 } else { 173 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); 174 CeedChkBackend(ierr); 175 if (vec == CEED_VECTOR_ACTIVE) vec = outvec; 176 // Check for multiple output modes 177 CeedInt index = -1; 178 for (CeedInt j = 0; j < i; j++) { 179 if (vec == outvecs[j]) { 180 index = j; 181 break; 182 } 183 } 184 if (index == -1) { 185 ierr = CeedVectorRestoreArray(vec, &data->fields.out[i]); 186 CeedChkBackend(ierr); 187 } 188 } 189 } 190 191 // Restore context data 192 if (ctx) { 193 ierr = CeedQFunctionContextRestoreData(ctx, &qf_data->d_c); 194 CeedChkBackend(ierr); 195 } 196 return CEED_ERROR_SUCCESS; 197 } 198 199 //------------------------------------------------------------------------------ 200 // Create operator 201 //------------------------------------------------------------------------------ 202 int CeedOperatorCreate_Hip_gen(CeedOperator op) { 203 int ierr; 204 Ceed ceed; 205 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 206 CeedOperator_Hip_gen *impl; 207 208 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 209 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 210 211 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 212 CeedOperatorApplyAdd_Hip_gen); CeedChkBackend(ierr); 213 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 214 CeedOperatorDestroy_Hip_gen); CeedChkBackend(ierr); 215 return CEED_ERROR_SUCCESS; 216 } 217 //------------------------------------------------------------------------------ 218