1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed-impl.h> 18 #include <string.h> 19 #include "magma.h" 20 21 typedef struct { 22 CeedScalar *array; 23 CeedScalar *darray; 24 int own_; 25 } CeedVector_Magma; 26 27 typedef struct { 28 const CeedInt *indices; 29 CeedInt *indices_allocated; 30 } CeedElemRestriction_Magma; 31 32 typedef struct { 33 CeedVector etmp; 34 CeedVector qdata; 35 } CeedOperator_Magma; 36 37 // ***************************************************************************** 38 // * Initialize vector vec (after free mem) with values from array based on cmode 39 // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 40 // * to array, and data is copied (not store passed pointer) 41 // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 42 // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 43 // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 44 // ***************************************************************************** 45 static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 46 CeedCopyMode cmode, CeedScalar *array) { 47 CeedVector_Magma *impl = vec->data; 48 int ierr; 49 50 // If own data, free that "old" data, e.g., as it may be of different size 51 if (impl->own_){ 52 magma_free( impl->darray ); 53 magma_free_pinned( impl->array ); 54 impl->darray = NULL; 55 impl->array = NULL; 56 impl->own_ = 0; 57 } 58 59 if (mtype == CEED_MEM_HOST) { 60 // memory is on the host; own_ = 0 61 switch (cmode) { 62 case CEED_COPY_VALUES: 63 ierr = magma_malloc( (void**)&impl->darray, 64 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 65 ierr = magma_malloc_pinned( (void**)&impl->array, 66 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 67 impl->own_ = 1; 68 69 if (array) 70 magma_setvector(vec->length, sizeof(array[0]), 71 array, 1, impl->darray, 1); 72 break; 73 case CEED_OWN_POINTER: 74 ierr = magma_malloc( (void**)&impl->darray, 75 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 76 impl->array = array; 77 impl->own_ = 1; 78 79 if (array) 80 magma_setvector(vec->length, sizeof(array[0]), 81 array, 1, impl->darray, 1); 82 break; 83 case CEED_USE_POINTER: 84 impl->darray = NULL; 85 impl->array = array; 86 } 87 } else if (mtype == CEED_MEM_DEVICE) { 88 // memory is on the device; own = 0 89 switch (cmode) { 90 case CEED_COPY_VALUES: 91 ierr = magma_malloc( (void**)&impl->darray, 92 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 93 ierr = magma_malloc_pinned( (void**)&impl->array, 94 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 95 impl->own_ = 1; 96 97 if (array) 98 magma_copyvector(vec->length, sizeof(array[0]), 99 array, 1, impl->darray, 1); 100 break; 101 case CEED_OWN_POINTER: 102 impl->darray = array; 103 ierr = magma_malloc_pinned( (void**)&impl->array, 104 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 105 impl->own_ = 1; 106 107 break; 108 case CEED_USE_POINTER: 109 impl->darray = array; 110 impl->array = NULL; 111 } 112 113 } else 114 return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 115 116 return 0; 117 } 118 119 // ***************************************************************************** 120 // * Give data pointer from vector vec to array (on HOST or DEVICE) 121 // ***************************************************************************** 122 static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 123 CeedScalar **array) { 124 CeedVector_Magma *impl = vec->data; 125 int ierr; 126 127 if (mtype == CEED_MEM_HOST) { 128 if (!impl->array){ 129 // Allocate data if array is NULL 130 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 131 CeedChk(ierr); 132 } else if (impl->own_) { 133 // data is owned so GPU had the most up-to-date version; copy it 134 magma_getvector(vec->length, sizeof(*array[0]), 135 impl->darray, 1, impl->array, 1); 136 } 137 *array = impl->array; 138 } else if (mtype == CEED_MEM_DEVICE) { 139 if (!impl->darray){ 140 // Allocate data if darray is NULL 141 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 142 CeedChk(ierr); 143 } 144 *array = impl->darray; 145 } else 146 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 147 148 return 0; 149 } 150 151 // ***************************************************************************** 152 // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 153 // ***************************************************************************** 154 static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 155 const CeedScalar **array) { 156 CeedVector_Magma *impl = vec->data; 157 int ierr; 158 159 if (mtype == CEED_MEM_HOST) { 160 if (!impl->array){ 161 // Allocate data if array is NULL 162 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 163 CeedChk(ierr); 164 } else if (impl->own_) { 165 // data is owned so GPU had the most up-to-date version; copy it 166 magma_getvector(vec->length, sizeof(*array[0]), 167 impl->darray, 1, impl->array, 1); 168 } 169 *array = impl->array; 170 } else if (mtype == CEED_MEM_DEVICE) { 171 if (!impl->darray){ 172 // Allocate data if darray is NULL 173 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 174 CeedChk(ierr); 175 } 176 *array = impl->darray; 177 } else 178 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 179 180 return 0; 181 } 182 183 // ***************************************************************************** 184 // * There is no mtype here for array so it is not clear if we restore from HOST 185 // * memory or from DEVICE memory. We assume that it is CPU memory because if 186 // * it was GPU memory we would not call this routine at all. 187 // * Restore vector vec with values from array, where array received its values 188 // * from vec and possibly modified them. 189 // ***************************************************************************** 190 static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 191 CeedVector_Magma *impl = vec->data; 192 193 if (impl->darray!=NULL) 194 magma_setvector(vec->length, sizeof(*array[0]), 195 *array, 1, impl->darray, 1); 196 197 *array = NULL; 198 return 0; 199 } 200 201 // ***************************************************************************** 202 // * There is no mtype here for array so it is not clear if we restore from HOST 203 // * memory or from DEVICE memory. We assume that it is CPU memory because if 204 // * it was GPU memory we would not call this routine at all. 205 // * Restore vector vec with values from array, where array received its values 206 // * from vec to only read them; in this case vec may have been modified meanwhile 207 // * and needs to be restored here. 208 // ***************************************************************************** 209 static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 210 const CeedScalar **array) { 211 CeedVector_Magma *impl = vec->data; 212 213 if (impl->darray!=NULL) 214 magma_setvector(vec->length, sizeof(*array[0]), 215 *array, 1, impl->darray, 1); 216 217 *array = NULL; 218 return 0; 219 } 220 221 static int CeedVectorDestroy_Magma(CeedVector vec) { 222 CeedVector_Magma *impl = vec->data; 223 int ierr; 224 225 // Free if we own the data 226 if (impl->own_){ 227 ierr = magma_free_pinned(impl->array); CeedChk(ierr); 228 ierr = magma_free(impl->darray); CeedChk(ierr); 229 } 230 ierr = CeedFree(&vec->data); CeedChk(ierr); 231 return 0; 232 } 233 234 // ***************************************************************************** 235 // * Create vector vec of size n 236 // ***************************************************************************** 237 static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 238 CeedVector_Magma *impl; 239 int ierr; 240 241 vec->SetArray = CeedVectorSetArray_Magma; 242 vec->GetArray = CeedVectorGetArray_Magma; 243 vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 244 vec->RestoreArray = CeedVectorRestoreArray_Magma; 245 vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 246 vec->Destroy = CeedVectorDestroy_Magma; 247 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 248 impl->darray = NULL; 249 impl->array = NULL; 250 impl->own_ = 0; 251 vec->data = impl; 252 return 0; 253 } 254 255 // ***************************************************************************** 256 // * Apply restriction operator r to u: v = r(rmode) u 257 // ***************************************************************************** 258 static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 259 CeedTransposeMode tmode, CeedInt ncomp, 260 CeedTransposeMode lmode, CeedVector u, 261 CeedVector v, CeedRequest *request) { 262 CeedElemRestriction_Magma *impl = r->data; 263 int ierr; 264 const CeedScalar *uu; 265 CeedScalar *vv; 266 CeedInt esize = r->nelem*r->elemsize; 267 //printf("HELLOOOOOOOOO=======================\n"); 268 269 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 270 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 271 if (tmode == CEED_NOTRANSPOSE) { 272 // Perform: v = r * u 273 if (ncomp == 1) { 274 for (CeedInt i=0; i<esize; i++) vv[i] = uu[impl->indices[i]]; 275 276 /* 277 // Works - in t05 x has to be with CEED_COPY_VALUES 278 CeedVector_Magma *uimpl = u->data, *vimpl = v->data; 279 uu = uimpl->darray; 280 vv = vimpl->darray; 281 CeedInt *indices;// = (int *)impl->indices; 282 magma_malloc( (void**)&indices, esize * sizeof(CeedInt)); 283 magma_setvector(esize, sizeof(CeedInt), 284 (int *)impl->indices, 1, indices, 1); 285 magma_template<<i=0:esize>>(const CeedScalar *uu, CeedScalar *vv, CeedInt *indices) 286 { 287 vv[i] = uu[indices[i]]; 288 } 289 290 // printf("HELLOOOOOOOOO....... size = %d\n", esize); 291 // 292 293 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 294 *request = NULL; 295 return 0; 296 */ 297 298 } else { 299 //printf("HELLOOOOOOOOO-------------\n"); 300 // vv is (elemsize x ncomp x nelem), column-major 301 if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 302 for (CeedInt e = 0; e < r->nelem; e++) 303 for (CeedInt d = 0; d < ncomp; d++) 304 for (CeedInt i=0; i<r->elemsize; i++) { 305 vv[i+r->elemsize*(d+ncomp*e)] = 306 uu[impl->indices[i+r->elemsize*e]+r->ndof*d]; 307 } 308 } else { // u is (ncomp x ndof), column-major 309 for (CeedInt e = 0; e < r->nelem; e++) 310 for (CeedInt d = 0; d < ncomp; d++) 311 for (CeedInt i=0; i<r->elemsize; i++) { 312 vv[i+r->elemsize*(d+ncomp*e)] = 313 uu[d+ncomp*impl->indices[i+r->elemsize*e]]; 314 } 315 } 316 } 317 } else { 318 // Note: in transpose mode, we perform: v += r^t * u 319 if (ncomp == 1) { 320 for (CeedInt i=0; i<esize; i++) vv[impl->indices[i]] += uu[i]; 321 } else { 322 //printf("HELLOOOOOOOOO+++++++++++++\n"); 323 // u is (elemsize x ncomp x nelem) 324 if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 325 for (CeedInt e = 0; e < r->nelem; e++) 326 for (CeedInt d = 0; d < ncomp; d++) 327 for (CeedInt i=0; i<r->elemsize; i++) { 328 vv[impl->indices[i+r->elemsize*e]+r->ndof*d] += 329 uu[i+r->elemsize*(d+e*ncomp)]; 330 } 331 } else { // vv is (ncomp x ndof), column-major 332 for (CeedInt e = 0; e < r->nelem; e++) 333 for (CeedInt d = 0; d < ncomp; d++) 334 for (CeedInt i=0; i<r->elemsize; i++) { 335 vv[d+ncomp*impl->indices[i+r->elemsize*e]] += 336 uu[i+r->elemsize*(d+e*ncomp)]; 337 } 338 } 339 } 340 } 341 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 342 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 343 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 344 *request = NULL; 345 return 0; 346 } 347 348 static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 349 CeedElemRestriction_Magma *impl = r->data; 350 int ierr; 351 352 ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 353 ierr = CeedFree(&r->data); CeedChk(ierr); 354 return 0; 355 } 356 357 static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 358 CeedMemType mtype, 359 CeedCopyMode cmode, 360 const CeedInt *indices) { 361 int ierr; 362 CeedElemRestriction_Magma *impl; 363 364 if (mtype != CEED_MEM_HOST) 365 return CeedError(r->ceed, 1, "Only MemType = HOST supported"); 366 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 367 switch (cmode) { 368 case CEED_COPY_VALUES: 369 //printf("CeedElemRestriction_Magma COPY\n"); 370 ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated); 371 CeedChk(ierr); 372 memcpy(impl->indices_allocated, indices, 373 r->nelem * r->elemsize * sizeof(indices[0])); 374 impl->indices = impl->indices_allocated; 375 break; 376 case CEED_OWN_POINTER: 377 //printf("CeedElemRestriction_Magma OWN\n"); 378 impl->indices_allocated = (CeedInt *)indices; 379 impl->indices = impl->indices_allocated; 380 break; 381 case CEED_USE_POINTER: 382 //printf("CeedElemRestriction_Magma USE\n"); 383 impl->indices = indices; 384 } 385 r->data = impl; 386 r->Apply = CeedElemRestrictionApply_Magma; 387 r->Destroy = CeedElemRestrictionDestroy_Magma; 388 return 0; 389 } 390 391 // Contracts on the middle index 392 // NOTRANSPOSE: V_ajc = T_jb U_abc 393 // TRANSPOSE: V_ajc = T_bj U_abc 394 // If Add != 0, "=" is replaced by "+=" 395 static int CeedTensorContract_Magma(Ceed ceed, 396 CeedInt A, CeedInt B, CeedInt C, CeedInt J, 397 const CeedScalar *t, CeedTransposeMode tmode, 398 const CeedInt Add, 399 const CeedScalar *u, CeedScalar *v) { 400 CeedInt tstride0 = B, tstride1 = 1; 401 if (tmode == CEED_TRANSPOSE) { 402 tstride0 = 1; tstride1 = J; 403 } 404 405 for (CeedInt a=0; a<A; a++) { 406 for (CeedInt j=0; j<J; j++) { 407 if (!Add) { 408 for (CeedInt c=0; c<C; c++) 409 v[(a*J+j)*C+c] = 0; 410 } 411 for (CeedInt b=0; b<B; b++) { 412 for (CeedInt c=0; c<C; c++) { 413 v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 414 } 415 } 416 } 417 } 418 return 0; 419 } 420 421 static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 422 CeedEvalMode emode, 423 const CeedScalar *u, CeedScalar *v) { 424 int ierr; 425 const CeedInt dim = basis->dim; 426 const CeedInt ndof = basis->ndof; 427 const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 428 const CeedInt add = (tmode == CEED_TRANSPOSE); 429 430 if (tmode == CEED_TRANSPOSE) { 431 const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 432 for (CeedInt i = 0; i < vsize; i++) 433 v[i] = (CeedScalar) 0; 434 } 435 if (emode & CEED_EVAL_INTERP) { 436 CeedInt P = basis->P1d, Q = basis->Q1d; 437 if (tmode == CEED_TRANSPOSE) { 438 P = basis->Q1d; Q = basis->P1d; 439 } 440 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 441 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 442 for (CeedInt d=0; d<dim; d++) { 443 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 444 tmode, add&&(d==dim-1), 445 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 446 CeedChk(ierr); 447 pre /= P; 448 post *= Q; 449 } 450 if (tmode == CEED_NOTRANSPOSE) { 451 v += nqpt; 452 } else { 453 u += nqpt; 454 } 455 } 456 if (emode & CEED_EVAL_GRAD) { 457 CeedInt P = basis->P1d, Q = basis->Q1d; 458 // In CEED_NOTRANSPOSE mode: 459 // u is (P^dim x nc), column-major layout (nc = ndof) 460 // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 461 // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 462 if (tmode == CEED_TRANSPOSE) { 463 P = basis->Q1d, Q = basis->P1d; 464 } 465 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 466 for (CeedInt p = 0; p < dim; p++) { 467 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 468 for (CeedInt d=0; d<dim; d++) { 469 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 470 (p==d)?basis->grad1d:basis->interp1d, 471 tmode, add&&(d==dim-1), 472 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 473 CeedChk(ierr); 474 pre /= P; 475 post *= Q; 476 } 477 if (tmode == CEED_NOTRANSPOSE) { 478 v += nqpt; 479 } else { 480 u += nqpt; 481 } 482 } 483 } 484 if (emode & CEED_EVAL_WEIGHT) { 485 if (tmode == CEED_TRANSPOSE) 486 return CeedError(basis->ceed, 1, 487 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 488 CeedInt Q = basis->Q1d; 489 for (CeedInt d=0; d<dim; d++) { 490 CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 491 for (CeedInt i=0; i<pre; i++) { 492 for (CeedInt j=0; j<Q; j++) { 493 for (CeedInt k=0; k<post; k++) { 494 v[(i*Q + j)*post + k] = basis->qweight1d[j] 495 * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 496 } 497 } 498 } 499 } 500 } 501 return 0; 502 } 503 504 static int CeedBasisDestroy_Magma(CeedBasis basis) { 505 return 0; 506 } 507 508 static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 509 CeedInt Q1d, const CeedScalar *interp1d, 510 const CeedScalar *grad1d, 511 const CeedScalar *qref1d, 512 const CeedScalar *qweight1d, 513 CeedBasis basis) { 514 basis->Apply = CeedBasisApply_Magma; 515 basis->Destroy = CeedBasisDestroy_Magma; 516 return 0; 517 } 518 519 static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q, 520 const CeedScalar *const *u, 521 CeedScalar *const *v) { 522 int ierr; 523 ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 524 return 0; 525 } 526 527 static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 528 return 0; 529 } 530 531 static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 532 qf->Apply = CeedQFunctionApply_Magma; 533 qf->Destroy = CeedQFunctionDestroy_Magma; 534 return 0; 535 } 536 537 static int CeedOperatorDestroy_Magma(CeedOperator op) { 538 CeedOperator_Magma *impl = op->data; 539 int ierr; 540 541 ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 542 ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 543 ierr = CeedFree(&op->data); CeedChk(ierr); 544 return 0; 545 } 546 547 static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata, 548 CeedVector ustate, 549 CeedVector residual, CeedRequest *request) { 550 CeedOperator_Magma *impl = op->data; 551 CeedVector etmp; 552 CeedInt Q; 553 const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 554 CeedScalar *Eu; 555 char *qd; 556 int ierr; 557 CeedTransposeMode lmode = CEED_NOTRANSPOSE; 558 559 if (!impl->etmp) { 560 ierr = CeedVectorCreate(op->ceed, 561 nc * op->Erestrict->nelem * op->Erestrict->elemsize, 562 &impl->etmp); CeedChk(ierr); 563 // etmp is allocated when CeedVectorGetArray is called below 564 } 565 etmp = impl->etmp; 566 if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 567 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 568 nc, lmode, ustate, etmp, 569 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 570 } 571 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 572 ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 573 ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 574 CeedChk(ierr); 575 for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 576 CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 577 const CeedScalar *in[5] = {0,0,0,0,0}; 578 // TODO: quadrature weights can be computed just once 579 ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 580 &Eu[e*op->Erestrict->elemsize*nc], BEu); 581 CeedChk(ierr); 582 CeedScalar *u_ptr = BEu, *v_ptr = BEv; 583 if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 584 if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 585 if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 586 if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 587 if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 588 ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 589 CeedChk(ierr); 590 ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 591 &Eu[e*op->Erestrict->elemsize*nc]); 592 CeedChk(ierr); 593 } 594 ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 595 if (residual) { 596 CeedScalar *res; 597 CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 598 for (int i = 0; i < residual->length; i++) 599 res[i] = (CeedScalar)0; 600 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 601 nc, lmode, etmp, residual, 602 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 603 } 604 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 605 *request = NULL; 606 return 0; 607 } 608 609 static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) { 610 CeedOperator_Magma *impl = op->data; 611 int ierr; 612 613 if (!impl->qdata) { 614 CeedInt Q; 615 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 616 ierr = CeedVectorCreate(op->ceed, 617 op->Erestrict->nelem * Q 618 * op->qf->qdatasize / sizeof(CeedScalar), 619 &impl->qdata); CeedChk(ierr); 620 } 621 *qdata = impl->qdata; 622 return 0; 623 } 624 625 static int CeedOperatorCreate_Magma(CeedOperator op) { 626 CeedOperator_Magma *impl; 627 int ierr; 628 629 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 630 op->data = impl; 631 op->Destroy = CeedOperatorDestroy_Magma; 632 op->Apply = CeedOperatorApply_Magma; 633 op->GetQData = CeedOperatorGetQData_Magma; 634 return 0; 635 } 636 637 // ***************************************************************************** 638 // * INIT 639 // ***************************************************************************** 640 static int CeedInit_Magma(const char *resource, Ceed ceed) { 641 if (strcmp(resource, "/cpu/magma") 642 && strcmp(resource, "/gpu/magma")) 643 return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 644 645 magma_init(); 646 //magma_print_environment(); 647 648 ceed->VecCreate = CeedVectorCreate_Magma; 649 ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 650 ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 651 ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 652 ceed->OperatorCreate = CeedOperatorCreate_Magma; 653 return 0; 654 } 655 656 // ***************************************************************************** 657 // * REGISTER 658 // ***************************************************************************** 659 __attribute__((constructor)) 660 static void Register(void) { 661 CeedRegister("/cpu/magma", CeedInit_Magma); 662 CeedRegister("/gpu/magma", CeedInit_Magma); 663 } 664