1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed-impl.h> 18 #include <string.h> 19 20 typedef struct { 21 CeedScalar *array; 22 CeedScalar *array_allocated; 23 } CeedVector_Ref; 24 25 typedef struct { 26 const CeedInt *indices; 27 CeedInt *indices_allocated; 28 } CeedElemRestriction_Ref; 29 30 typedef struct { 31 CeedVector etmp; 32 CeedVector qdata; 33 } CeedOperator_Ref; 34 35 static int CeedVectorSetArray_Ref(CeedVector vec, CeedMemType mtype, 36 CeedCopyMode cmode, CeedScalar *array) { 37 CeedVector_Ref *impl = vec->data; 38 int ierr; 39 40 if (mtype != CEED_MEM_HOST) 41 return CeedError(vec->ceed, 1, "Only MemType = HOST supported"); 42 ierr = CeedFree(&impl->array_allocated); CeedChk(ierr); 43 switch (cmode) { 44 case CEED_COPY_VALUES: 45 ierr = CeedMalloc(vec->length, &impl->array_allocated); CeedChk(ierr); 46 impl->array = impl->array_allocated; 47 if (array) memcpy(impl->array, array, vec->length * sizeof(array[0])); 48 break; 49 case CEED_OWN_POINTER: 50 impl->array_allocated = array; 51 impl->array = array; 52 break; 53 case CEED_USE_POINTER: 54 impl->array = array; 55 } 56 return 0; 57 } 58 59 static int CeedVectorGetArray_Ref(CeedVector vec, CeedMemType mtype, 60 CeedScalar **array) { 61 CeedVector_Ref *impl = vec->data; 62 int ierr; 63 64 if (mtype != CEED_MEM_HOST) 65 return CeedError(vec->ceed, 1, "Can only provide to HOST memory"); 66 if (!impl->array) { // Allocate if array is not yet allocated 67 ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL); 68 CeedChk(ierr); 69 } 70 *array = impl->array; 71 return 0; 72 } 73 74 static int CeedVectorGetArrayRead_Ref(CeedVector vec, CeedMemType mtype, 75 const CeedScalar **array) { 76 CeedVector_Ref *impl = vec->data; 77 int ierr; 78 79 if (mtype != CEED_MEM_HOST) 80 return CeedError(vec->ceed, 1, "Can only provide to HOST memory"); 81 if (!impl->array) { // Allocate if array is not yet allocated 82 ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL); 83 CeedChk(ierr); 84 } 85 *array = impl->array; 86 return 0; 87 } 88 89 static int CeedVectorRestoreArray_Ref(CeedVector vec, CeedScalar **array) { 90 *array = NULL; 91 return 0; 92 } 93 94 static int CeedVectorRestoreArrayRead_Ref(CeedVector vec, 95 const CeedScalar **array) { 96 *array = NULL; 97 return 0; 98 } 99 100 static int CeedVectorDestroy_Ref(CeedVector vec) { 101 CeedVector_Ref *impl = vec->data; 102 int ierr; 103 104 ierr = CeedFree(&impl->array_allocated); CeedChk(ierr); 105 ierr = CeedFree(&vec->data); CeedChk(ierr); 106 return 0; 107 } 108 109 static int CeedVectorCreate_Ref(Ceed ceed, CeedInt n, CeedVector vec) { 110 CeedVector_Ref *impl; 111 int ierr; 112 113 vec->SetArray = CeedVectorSetArray_Ref; 114 vec->GetArray = CeedVectorGetArray_Ref; 115 vec->GetArrayRead = CeedVectorGetArrayRead_Ref; 116 vec->RestoreArray = CeedVectorRestoreArray_Ref; 117 vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Ref; 118 vec->Destroy = CeedVectorDestroy_Ref; 119 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 120 vec->data = impl; 121 return 0; 122 } 123 124 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 125 CeedTransposeMode tmode, CeedVector u, 126 CeedVector v, CeedRequest *request) { 127 CeedElemRestriction_Ref *impl = r->data; 128 int ierr; 129 const CeedScalar *uu; 130 CeedScalar *vv; 131 132 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 133 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 134 if (tmode == CEED_NOTRANSPOSE) { 135 for (CeedInt i=0; i<r->nelem*r->elemsize; i++) vv[i] = uu[impl->indices[i]]; 136 } else { 137 for (CeedInt i=0; i<r->nelem*r->elemsize; i++) vv[impl->indices[i]] += uu[i]; 138 } 139 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 140 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 141 if (request != CEED_REQUEST_IMMEDIATE) *request = NULL; 142 return 0; 143 } 144 145 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 146 CeedElemRestriction_Ref *impl = r->data; 147 int ierr; 148 149 ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 150 ierr = CeedFree(&r->data); CeedChk(ierr); 151 return 0; 152 } 153 154 static int CeedElemRestrictionCreate_Ref(CeedElemRestriction r, 155 CeedMemType mtype, 156 CeedCopyMode cmode, const CeedInt *indices) { 157 int ierr; 158 CeedElemRestriction_Ref *impl; 159 160 if (mtype != CEED_MEM_HOST) 161 return CeedError(r->ceed, 1, "Only MemType = HOST supported"); 162 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 163 switch (cmode) { 164 case CEED_COPY_VALUES: 165 ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated); 166 CeedChk(ierr); 167 memcpy(impl->indices_allocated, indices, 168 r->nelem * r->elemsize * sizeof(indices[0])); 169 impl->indices = impl->indices_allocated; 170 break; 171 case CEED_OWN_POINTER: 172 impl->indices_allocated = (CeedInt *)indices; 173 impl->indices = impl->indices_allocated; 174 break; 175 case CEED_USE_POINTER: 176 impl->indices = indices; 177 } 178 r->data = impl; 179 r->Apply = CeedElemRestrictionApply_Ref; 180 r->Destroy = CeedElemRestrictionDestroy_Ref; 181 return 0; 182 } 183 184 // Contracts on the middle index 185 // NOTRANSPOSE: V_ajc = T_jb U_abc 186 // TRANSPOSE: V_ajc = T_bj U_abc 187 static int CeedTensorContract_Ref(Ceed ceed, 188 CeedInt A, CeedInt B, CeedInt C, CeedInt J, 189 const CeedScalar *t, CeedTransposeMode tmode, 190 const CeedScalar *u, CeedScalar *v) { 191 CeedInt tstride0 = B, tstride1 = 1; 192 if (tmode == CEED_TRANSPOSE) { 193 tstride0 = 1; tstride1 = J; 194 } 195 196 for (CeedInt a=0; a<A; a++) { 197 for (CeedInt j=0; j<J; j++) { 198 for (CeedInt c=0; c<C; c++) 199 v[(a*J+j)*C+c] = 0; 200 for (CeedInt b=0; b<B; b++) { 201 for (CeedInt c=0; c<C; c++) { 202 v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 203 } 204 } 205 } 206 } 207 return 0; 208 } 209 210 static int CeedBasisApply_Ref(CeedBasis basis, CeedTransposeMode tmode, 211 CeedEvalMode emode, 212 const CeedScalar *u, CeedScalar *v) { 213 int ierr; 214 const CeedInt dim = basis->dim; 215 const CeedInt ndof = basis->ndof; 216 217 switch (emode) { 218 case CEED_EVAL_NONE: break; 219 case CEED_EVAL_INTERP: { 220 CeedInt P = basis->P1d, Q = basis->Q1d; 221 if (tmode == CEED_TRANSPOSE) { 222 P = basis->Q1d; Q = basis->P1d; 223 } 224 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 225 CeedScalar tmp[2][Q*CeedPowInt(P>Q?P:Q, dim-1)]; 226 for (CeedInt d=0; d<dim; d++) { 227 ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, basis->interp1d, 228 tmode, 229 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); CeedChk(ierr); 230 pre /= P; 231 post *= Q; 232 } 233 } break; 234 case CEED_EVAL_WEIGHT: { 235 if (tmode == CEED_TRANSPOSE) 236 return CeedError(basis->ceed, 1, 237 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 238 CeedInt Q = basis->Q1d; 239 for (CeedInt d=0; d<dim; d++) { 240 CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 241 for (CeedInt i=0; i<pre; i++) { 242 for (CeedInt j=0; j<Q; j++) { 243 for (CeedInt k=0; k<post; k++) { 244 v[(i*Q + j)*post + k] = basis->qweight1d[j] 245 * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 246 } 247 } 248 } 249 } 250 } break; 251 default: 252 return CeedError(basis->ceed, 1, "EvalMode %d not supported", emode); 253 } 254 return 0; 255 } 256 257 static int CeedBasisDestroy_Ref(CeedBasis basis) { 258 return 0; 259 } 260 261 static int CeedBasisCreateTensorH1_Ref(Ceed ceed, CeedInt dim, CeedInt P1d, 262 CeedInt Q1d, const CeedScalar *interp1d, 263 const CeedScalar *grad1d, 264 const CeedScalar *qref1d, 265 const CeedScalar *qweight1d, 266 CeedBasis basis) { 267 basis->Apply = CeedBasisApply_Ref; 268 basis->Destroy = CeedBasisDestroy_Ref; 269 return 0; 270 } 271 272 static int CeedQFunctionApply_Ref(CeedQFunction qf, void *qdata, CeedInt Q, 273 const CeedScalar *const *u, 274 CeedScalar *const *v) { 275 int ierr; 276 ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 277 return 0; 278 } 279 280 static int CeedQFunctionDestroy_Ref(CeedQFunction qf) { 281 return 0; 282 } 283 284 static int CeedQFunctionCreate_Ref(CeedQFunction qf) { 285 qf->Apply = CeedQFunctionApply_Ref; 286 qf->Destroy = CeedQFunctionDestroy_Ref; 287 return 0; 288 } 289 290 static int CeedOperatorDestroy_Ref(CeedOperator op) { 291 CeedOperator_Ref *impl = op->data; 292 int ierr; 293 294 ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 295 ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 296 ierr = CeedFree(&op->data); CeedChk(ierr); 297 return 0; 298 } 299 300 static int CeedOperatorApply_Ref(CeedOperator op, CeedVector qdata, 301 CeedVector ustate, 302 CeedVector residual, CeedRequest *request) { 303 CeedOperator_Ref *impl = op->data; 304 CeedVector etmp; 305 CeedInt Q; 306 CeedScalar *Eu; 307 char *qd; 308 int ierr; 309 310 if (!impl->etmp) { 311 ierr = CeedVectorCreate(op->ceed, 312 op->Erestrict->nelem * op->Erestrict->elemsize, 313 &impl->etmp); CeedChk(ierr); 314 } 315 etmp = impl->etmp; 316 if (op->qf->inmode != CEED_EVAL_NONE || op->qf->inmode != CEED_EVAL_WEIGHT) { 317 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, ustate, etmp, 318 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 319 } 320 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 321 ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 322 ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar **)&qd); 323 CeedChk(ierr); 324 for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 325 CeedScalar BEu[Q], BEv[Q], *out[1]; 326 const CeedScalar *in[1]; 327 ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 328 &Eu[e*op->Erestrict->elemsize], BEu); CeedChk(ierr); 329 in[0] = BEu; 330 out[0] = BEv; 331 ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 332 CeedChk(ierr); 333 ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 334 &Eu[e*op->Erestrict->elemsize]); CeedChk(ierr); 335 } 336 ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 337 if (residual) { 338 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, etmp, residual, 339 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 340 } 341 if (request != CEED_REQUEST_IMMEDIATE) *request = NULL; 342 return 0; 343 } 344 345 static int CeedOperatorGetQData_Ref(CeedOperator op, CeedVector *qdata) { 346 CeedOperator_Ref *impl = op->data; 347 int ierr; 348 349 if (!impl->qdata) { 350 CeedInt Q; 351 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 352 ierr = CeedVectorCreate(op->ceed, 353 op->Erestrict->nelem * Q 354 * op->qf->qdatasize / sizeof(CeedScalar), 355 &impl->qdata); CeedChk(ierr); 356 } 357 *qdata = impl->qdata; 358 return 0; 359 } 360 361 static int CeedOperatorCreate_Ref(CeedOperator op) { 362 CeedOperator_Ref *impl; 363 int ierr; 364 365 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 366 op->data = impl; 367 op->Destroy = CeedOperatorDestroy_Ref; 368 op->Apply = CeedOperatorApply_Ref; 369 op->GetQData = CeedOperatorGetQData_Ref; 370 return 0; 371 } 372 373 static int CeedInit_Ref(const char *resource, Ceed ceed) { 374 if (strcmp(resource, "/cpu/self") 375 && strcmp(resource, "/cpu/self/ref")) 376 return CeedError(ceed, 1, "Ref backend cannot use resource: %s", resource); 377 ceed->VecCreate = CeedVectorCreate_Ref; 378 ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Ref; 379 ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Ref; 380 ceed->QFunctionCreate = CeedQFunctionCreate_Ref; 381 ceed->OperatorCreate = CeedOperatorCreate_Ref; 382 return 0; 383 } 384 385 __attribute__((constructor)) 386 static void Register(void) { 387 CeedRegister("/cpu/self/ref", CeedInit_Ref); 388 } 389