1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed-impl.h> 18 #include <string.h> 19 20 typedef struct { 21 CeedScalar *array; 22 CeedScalar *array_allocated; 23 } CeedVector_Ref; 24 25 typedef struct { 26 const CeedInt *indices; 27 CeedInt *indices_allocated; 28 } CeedElemRestriction_Ref; 29 30 typedef struct { 31 CeedVector etmp; 32 CeedVector qdata; 33 } CeedOperator_Ref; 34 35 static int CeedVectorSetArray_Ref(CeedVector vec, CeedMemType mtype, 36 CeedCopyMode cmode, CeedScalar *array) { 37 CeedVector_Ref *impl = vec->data; 38 int ierr; 39 40 if (mtype != CEED_MEM_HOST) 41 return CeedError(vec->ceed, 1, "Only MemType = HOST supported"); 42 ierr = CeedFree(&impl->array_allocated); CeedChk(ierr); 43 switch (cmode) { 44 case CEED_COPY_VALUES: 45 ierr = CeedMalloc(vec->length, &impl->array_allocated); CeedChk(ierr); 46 impl->array = impl->array_allocated; 47 if (array) memcpy(impl->array, array, vec->length * sizeof(array[0])); 48 break; 49 case CEED_OWN_POINTER: 50 impl->array_allocated = array; 51 impl->array = array; 52 break; 53 case CEED_USE_POINTER: 54 impl->array = array; 55 } 56 return 0; 57 } 58 59 static int CeedVectorGetArray_Ref(CeedVector vec, CeedMemType mtype, 60 CeedScalar **array) { 61 CeedVector_Ref *impl = vec->data; 62 int ierr; 63 64 if (mtype != CEED_MEM_HOST) 65 return CeedError(vec->ceed, 1, "Can only provide to HOST memory"); 66 if (!impl->array) { // Allocate if array is not yet allocated 67 ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL); 68 CeedChk(ierr); 69 } 70 *array = impl->array; 71 return 0; 72 } 73 74 static int CeedVectorGetArrayRead_Ref(CeedVector vec, CeedMemType mtype, 75 const CeedScalar **array) { 76 CeedVector_Ref *impl = vec->data; 77 int ierr; 78 79 if (mtype != CEED_MEM_HOST) 80 return CeedError(vec->ceed, 1, "Can only provide to HOST memory"); 81 if (!impl->array) { // Allocate if array is not yet allocated 82 ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL); 83 CeedChk(ierr); 84 } 85 *array = impl->array; 86 return 0; 87 } 88 89 static int CeedVectorRestoreArray_Ref(CeedVector vec, CeedScalar **array) { 90 *array = NULL; 91 return 0; 92 } 93 94 static int CeedVectorRestoreArrayRead_Ref(CeedVector vec, 95 const CeedScalar **array) { 96 *array = NULL; 97 return 0; 98 } 99 100 static int CeedVectorDestroy_Ref(CeedVector vec) { 101 CeedVector_Ref *impl = vec->data; 102 int ierr; 103 104 ierr = CeedFree(&impl->array_allocated); CeedChk(ierr); 105 ierr = CeedFree(&vec->data); CeedChk(ierr); 106 return 0; 107 } 108 109 static int CeedVectorCreate_Ref(Ceed ceed, CeedInt n, CeedVector vec) { 110 CeedVector_Ref *impl; 111 int ierr; 112 113 vec->SetArray = CeedVectorSetArray_Ref; 114 vec->GetArray = CeedVectorGetArray_Ref; 115 vec->GetArrayRead = CeedVectorGetArrayRead_Ref; 116 vec->RestoreArray = CeedVectorRestoreArray_Ref; 117 vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Ref; 118 vec->Destroy = CeedVectorDestroy_Ref; 119 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 120 vec->data = impl; 121 return 0; 122 } 123 124 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 125 CeedTransposeMode tmode, CeedInt ncomp, 126 CeedTransposeMode lmode, CeedVector u, 127 CeedVector v, CeedRequest *request) { 128 CeedElemRestriction_Ref *impl = r->data; 129 int ierr; 130 const CeedScalar *uu; 131 CeedScalar *vv; 132 CeedInt esize = r->nelem*r->elemsize; 133 134 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 135 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 136 if (tmode == CEED_NOTRANSPOSE) { 137 // Perform: v = r * u 138 if (!impl->indices) { 139 for (CeedInt i=0; i<esize; i++) vv[i] = uu[i]; 140 } else if (ncomp == 1) { 141 for (CeedInt i=0; i<esize; i++) vv[i] = uu[impl->indices[i]]; 142 } else { 143 // vv is (elemsize x ncomp x nelem), column-major 144 if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 145 for (CeedInt e = 0; e < r->nelem; e++) 146 for (CeedInt d = 0; d < ncomp; d++) 147 for (CeedInt i=0; i<r->elemsize; i++) { 148 vv[i+r->elemsize*(d+ncomp*e)] = 149 uu[impl->indices[i+r->elemsize*e]+r->ndof*d]; 150 } 151 } else { // u is (ncomp x ndof), column-major 152 for (CeedInt e = 0; e < r->nelem; e++) 153 for (CeedInt d = 0; d < ncomp; d++) 154 for (CeedInt i=0; i<r->elemsize; i++) { 155 vv[i+r->elemsize*(d+ncomp*e)] = 156 uu[d+ncomp*impl->indices[i+r->elemsize*e]]; 157 } 158 } 159 } 160 } else { 161 // Note: in transpose mode, we perform: v += r^t * u 162 if (!impl->indices) { 163 for (CeedInt i=0; i<esize; i++) vv[i] += uu[i]; 164 } else if (ncomp == 1) { 165 for (CeedInt i=0; i<esize; i++) vv[impl->indices[i]] += uu[i]; 166 } else { 167 // u is (elemsize x ncomp x nelem) 168 if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 169 for (CeedInt e = 0; e < r->nelem; e++) 170 for (CeedInt d = 0; d < ncomp; d++) 171 for (CeedInt i=0; i<r->elemsize; i++) { 172 vv[impl->indices[i+r->elemsize*e]+r->ndof*d] += 173 uu[i+r->elemsize*(d+e*ncomp)]; 174 } 175 } else { // vv is (ncomp x ndof), column-major 176 for (CeedInt e = 0; e < r->nelem; e++) 177 for (CeedInt d = 0; d < ncomp; d++) 178 for (CeedInt i=0; i<r->elemsize; i++) { 179 vv[d+ncomp*impl->indices[i+r->elemsize*e]] += 180 uu[i+r->elemsize*(d+e*ncomp)]; 181 } 182 } 183 } 184 } 185 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 186 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 187 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 188 *request = NULL; 189 return 0; 190 } 191 192 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 193 CeedElemRestriction_Ref *impl = r->data; 194 int ierr; 195 196 ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 197 ierr = CeedFree(&r->data); CeedChk(ierr); 198 return 0; 199 } 200 201 static int CeedElemRestrictionCreate_Ref(CeedElemRestriction r, 202 CeedMemType mtype, 203 CeedCopyMode cmode, const CeedInt *indices) { 204 int ierr; 205 CeedElemRestriction_Ref *impl; 206 207 if (mtype != CEED_MEM_HOST) 208 return CeedError(r->ceed, 1, "Only MemType = HOST supported"); 209 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 210 switch (cmode) { 211 case CEED_COPY_VALUES: 212 ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated); 213 CeedChk(ierr); 214 memcpy(impl->indices_allocated, indices, 215 r->nelem * r->elemsize * sizeof(indices[0])); 216 impl->indices = impl->indices_allocated; 217 break; 218 case CEED_OWN_POINTER: 219 impl->indices_allocated = (CeedInt *)indices; 220 impl->indices = impl->indices_allocated; 221 break; 222 case CEED_USE_POINTER: 223 impl->indices = indices; 224 } 225 r->data = impl; 226 r->Apply = CeedElemRestrictionApply_Ref; 227 r->Destroy = CeedElemRestrictionDestroy_Ref; 228 return 0; 229 } 230 231 // Contracts on the middle index 232 // NOTRANSPOSE: V_ajc = T_jb U_abc 233 // TRANSPOSE: V_ajc = T_bj U_abc 234 // If Add != 0, "=" is replaced by "+=" 235 static int CeedTensorContract_Ref(Ceed ceed, 236 CeedInt A, CeedInt B, CeedInt C, CeedInt J, 237 const CeedScalar *t, CeedTransposeMode tmode, 238 const CeedInt Add, 239 const CeedScalar *u, CeedScalar *v) { 240 CeedInt tstride0 = B, tstride1 = 1; 241 if (tmode == CEED_TRANSPOSE) { 242 tstride0 = 1; tstride1 = J; 243 } 244 245 for (CeedInt a=0; a<A; a++) { 246 for (CeedInt j=0; j<J; j++) { 247 if (!Add) { 248 for (CeedInt c=0; c<C; c++) 249 v[(a*J+j)*C+c] = 0; 250 } 251 for (CeedInt b=0; b<B; b++) { 252 for (CeedInt c=0; c<C; c++) { 253 v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 254 } 255 } 256 } 257 } 258 return 0; 259 } 260 261 static int CeedBasisApply_Ref(CeedBasis basis, CeedTransposeMode tmode, 262 CeedEvalMode emode, 263 const CeedScalar *u, CeedScalar *v) { 264 int ierr; 265 const CeedInt dim = basis->dim; 266 const CeedInt ndof = basis->ndof; 267 const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 268 const CeedInt add = (tmode == CEED_TRANSPOSE); 269 270 if (tmode == CEED_TRANSPOSE) { 271 const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 272 for (CeedInt i = 0; i < vsize; i++) 273 v[i] = (CeedScalar) 0; 274 } 275 if (emode & CEED_EVAL_INTERP) { 276 CeedInt P = basis->P1d, Q = basis->Q1d; 277 if (tmode == CEED_TRANSPOSE) { 278 P = basis->Q1d; Q = basis->P1d; 279 } 280 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 281 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 282 for (CeedInt d=0; d<dim; d++) { 283 ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, basis->interp1d, 284 tmode, add&&(d==dim-1), 285 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 286 CeedChk(ierr); 287 pre /= P; 288 post *= Q; 289 } 290 if (tmode == CEED_NOTRANSPOSE) { 291 v += nqpt; 292 } else { 293 u += nqpt; 294 } 295 } 296 if (emode & CEED_EVAL_GRAD) { 297 CeedInt P = basis->P1d, Q = basis->Q1d; 298 // In CEED_NOTRANSPOSE mode: 299 // u is (P^dim x nc), column-major layout (nc = ndof) 300 // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 301 // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 302 if (tmode == CEED_TRANSPOSE) { 303 P = basis->Q1d, Q = basis->P1d; 304 } 305 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 306 for (CeedInt p = 0; p < dim; p++) { 307 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 308 for (CeedInt d=0; d<dim; d++) { 309 ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, 310 (p==d)?basis->grad1d:basis->interp1d, 311 tmode, add&&(d==dim-1), 312 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 313 CeedChk(ierr); 314 pre /= P; 315 post *= Q; 316 } 317 if (tmode == CEED_NOTRANSPOSE) { 318 v += nqpt; 319 } else { 320 u += nqpt; 321 } 322 } 323 } 324 if (emode & CEED_EVAL_WEIGHT) { 325 if (tmode == CEED_TRANSPOSE) 326 return CeedError(basis->ceed, 1, 327 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 328 CeedInt Q = basis->Q1d; 329 for (CeedInt d=0; d<dim; d++) { 330 CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 331 for (CeedInt i=0; i<pre; i++) { 332 for (CeedInt j=0; j<Q; j++) { 333 for (CeedInt k=0; k<post; k++) { 334 v[(i*Q + j)*post + k] = basis->qweight1d[j] 335 * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 336 } 337 } 338 } 339 } 340 } 341 return 0; 342 } 343 344 static int CeedBasisDestroy_Ref(CeedBasis basis) { 345 return 0; 346 } 347 348 static int CeedBasisCreateTensorH1_Ref(Ceed ceed, CeedInt dim, CeedInt P1d, 349 CeedInt Q1d, const CeedScalar *interp1d, 350 const CeedScalar *grad1d, 351 const CeedScalar *qref1d, 352 const CeedScalar *qweight1d, 353 CeedBasis basis) { 354 basis->Apply = CeedBasisApply_Ref; 355 basis->Destroy = CeedBasisDestroy_Ref; 356 return 0; 357 } 358 359 static int CeedQFunctionApply_Ref(CeedQFunction qf, void *qdata, CeedInt Q, 360 const CeedScalar *const *u, 361 CeedScalar *const *v) { 362 int ierr; 363 ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 364 return 0; 365 } 366 367 static int CeedQFunctionDestroy_Ref(CeedQFunction qf) { 368 return 0; 369 } 370 371 static int CeedQFunctionCreate_Ref(CeedQFunction qf) { 372 qf->Apply = CeedQFunctionApply_Ref; 373 qf->Destroy = CeedQFunctionDestroy_Ref; 374 return 0; 375 } 376 377 static int CeedOperatorDestroy_Ref(CeedOperator op) { 378 CeedOperator_Ref *impl = op->data; 379 int ierr; 380 381 ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 382 ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 383 ierr = CeedFree(&op->data); CeedChk(ierr); 384 return 0; 385 } 386 387 static int CeedOperatorApply_Ref(CeedOperator op, CeedVector qdata, 388 CeedVector ustate, 389 CeedVector residual, CeedRequest *request) { 390 CeedOperator_Ref *impl = op->data; 391 CeedVector etmp; 392 CeedInt Q; 393 const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 394 CeedScalar *Eu; 395 char *qd; 396 int ierr; 397 CeedTransposeMode lmode = CEED_NOTRANSPOSE; 398 399 if (!impl->etmp) { 400 ierr = CeedVectorCreate(op->ceed, 401 nc * op->Erestrict->nelem * op->Erestrict->elemsize, 402 &impl->etmp); CeedChk(ierr); 403 // etmp is allocated when CeedVectorGetArray is called below 404 } 405 etmp = impl->etmp; 406 if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 407 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 408 nc, lmode, ustate, etmp, 409 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 410 } 411 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 412 ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 413 ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 414 CeedChk(ierr); 415 for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 416 CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 417 const CeedScalar *in[5] = {0,0,0,0,0}; 418 // TODO: quadrature weights can be computed just once 419 ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 420 &Eu[e*op->Erestrict->elemsize*nc], BEu); 421 CeedChk(ierr); 422 CeedScalar *u_ptr = BEu, *v_ptr = BEv; 423 if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 424 if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 425 if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 426 if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 427 if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 428 ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 429 CeedChk(ierr); 430 ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 431 &Eu[e*op->Erestrict->elemsize*nc]); 432 CeedChk(ierr); 433 } 434 ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 435 if (residual) { 436 CeedScalar *res; 437 CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 438 for (int i = 0; i < residual->length; i++) 439 res[i] = (CeedScalar)0; 440 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 441 nc, lmode, etmp, residual, 442 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 443 } 444 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 445 *request = NULL; 446 return 0; 447 } 448 449 static int CeedOperatorGetQData_Ref(CeedOperator op, CeedVector *qdata) { 450 CeedOperator_Ref *impl = op->data; 451 int ierr; 452 453 if (!impl->qdata) { 454 CeedInt Q; 455 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 456 ierr = CeedVectorCreate(op->ceed, 457 op->Erestrict->nelem * Q 458 * op->qf->qdatasize / sizeof(CeedScalar), 459 &impl->qdata); CeedChk(ierr); 460 } 461 *qdata = impl->qdata; 462 return 0; 463 } 464 465 static int CeedOperatorCreate_Ref(CeedOperator op) { 466 CeedOperator_Ref *impl; 467 int ierr; 468 469 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 470 op->data = impl; 471 op->Destroy = CeedOperatorDestroy_Ref; 472 op->Apply = CeedOperatorApply_Ref; 473 op->GetQData = CeedOperatorGetQData_Ref; 474 return 0; 475 } 476 477 static int CeedInit_Ref(const char *resource, Ceed ceed) { 478 if (strcmp(resource, "/cpu/self") 479 && strcmp(resource, "/cpu/self/ref")) 480 return CeedError(ceed, 1, "Ref backend cannot use resource: %s", resource); 481 ceed->VecCreate = CeedVectorCreate_Ref; 482 ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Ref; 483 ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Ref; 484 ceed->QFunctionCreate = CeedQFunctionCreate_Ref; 485 ceed->OperatorCreate = CeedOperatorCreate_Ref; 486 return 0; 487 } 488 489 __attribute__((constructor)) 490 static void Register(void) { 491 CeedRegister("/cpu/self/ref", CeedInit_Ref); 492 } 493