1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed-impl.h> 18 #include <string.h> 19 #include "magma.h" 20 21 typedef struct { 22 CeedScalar *array; 23 CeedScalar *darray; 24 int own_; 25 int down_; 26 } CeedVector_Magma; 27 28 typedef struct { 29 CeedInt *indices; 30 CeedInt *dindices; 31 int own_; 32 int down_; // cover a case where we own Device memory 33 } CeedElemRestriction_Magma; 34 35 typedef struct { 36 CeedVector etmp; 37 CeedVector qdata; 38 } CeedOperator_Magma; 39 40 // ***************************************************************************** 41 // * Initialize vector vec (after free mem) with values from array based on cmode 42 // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 43 // * to array, and data is copied (not store passed pointer) 44 // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 45 // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 46 // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 47 // ***************************************************************************** 48 static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 49 CeedCopyMode cmode, CeedScalar *array) { 50 CeedVector_Magma *impl = vec->data; 51 int ierr; 52 53 // If own data, free the "old" data, e.g., as it may be of different size 54 if (impl->own_){ 55 magma_free( impl->darray ); 56 magma_free_pinned( impl->array ); 57 impl->darray = NULL; 58 impl->array = NULL; 59 impl->own_ = 0; 60 impl->down_= 0; 61 } 62 63 if (mtype == CEED_MEM_HOST) { 64 // memory is on the host; own_ = 0 65 switch (cmode) { 66 case CEED_COPY_VALUES: 67 ierr = magma_malloc( (void**)&impl->darray, 68 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 69 ierr = magma_malloc_pinned( (void**)&impl->array, 70 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 71 impl->own_ = 1; 72 73 if (array != NULL) 74 magma_setvector(vec->length, sizeof(array[0]), 75 array, 1, impl->darray, 1); 76 break; 77 case CEED_OWN_POINTER: 78 ierr = magma_malloc( (void**)&impl->darray, 79 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 80 // TODO: possible problem here is if we are passed non-pinned memory; 81 // (as we own it, lter in destroy, we use free for pinned memory). 82 impl->array = array; 83 impl->own_ = 1; 84 85 if (array != NULL) 86 magma_setvector(vec->length, sizeof(array[0]), 87 array, 1, impl->darray, 1); 88 break; 89 case CEED_USE_POINTER: 90 ierr = magma_malloc( (void**)&impl->darray, 91 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 92 magma_setvector(vec->length, sizeof(array[0]), 93 array, 1, impl->darray, 1); 94 impl->array = array; 95 } 96 } else if (mtype == CEED_MEM_DEVICE) { 97 // memory is on the device; own = 0 98 switch (cmode) { 99 case CEED_COPY_VALUES: 100 ierr = magma_malloc( (void**)&impl->darray, 101 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 102 ierr = magma_malloc_pinned( (void**)&impl->array, 103 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 104 impl->own_ = 1; 105 106 if (array) 107 magma_copyvector(vec->length, sizeof(array[0]), 108 array, 1, impl->darray, 1); 109 break; 110 case CEED_OWN_POINTER: 111 impl->darray = array; 112 ierr = magma_malloc_pinned( (void**)&impl->array, 113 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 114 impl->own_ = 1; 115 116 break; 117 case CEED_USE_POINTER: 118 impl->darray = array; 119 impl->array = NULL; 120 } 121 122 } else 123 return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 124 125 return 0; 126 } 127 128 // ***************************************************************************** 129 // * Give data pointer from vector vec to array (on HOST or DEVICE) 130 // ***************************************************************************** 131 static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 132 CeedScalar **array) { 133 CeedVector_Magma *impl = vec->data; 134 int ierr; 135 136 if (mtype == CEED_MEM_HOST) { 137 if (impl->own_) { 138 // data is owned so GPU had the most up-to-date version; copy it 139 magma_getvector(vec->length, sizeof(*array[0]), 140 impl->darray, 1, impl->array, 1); 141 } else if (impl->array == NULL) { 142 // Vector doesn't own the data and was set on GPU 143 if (impl->darray == NULL) { 144 // call was made just to allocate memory 145 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 146 CeedChk(ierr); 147 } else 148 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 149 } 150 *array = impl->array; 151 } else if (mtype == CEED_MEM_DEVICE) { 152 if (impl->darray == NULL){ 153 // Vector doesn't own the data and was set on the CPU 154 if (impl->array == NULL) { 155 // call was made just to allocate memory 156 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 157 CeedChk(ierr); 158 } else 159 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 160 } 161 *array = impl->darray; 162 } else 163 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 164 165 return 0; 166 } 167 168 // ***************************************************************************** 169 // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 170 // ***************************************************************************** 171 static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 172 const CeedScalar **array) { 173 CeedVector_Magma *impl = vec->data; 174 int ierr; 175 176 if (mtype == CEED_MEM_HOST) { 177 if (impl->own_) { 178 // data is owned so GPU had the most up-to-date version; copy it 179 magma_getvector(vec->length, sizeof(*array[0]), 180 impl->darray, 1, impl->array, 1); 181 } else if (impl->array == NULL) { 182 // Vector doesn't own the data and was set on GPU 183 if (impl->darray == NULL) { 184 // call was made just to allocate memory 185 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 186 CeedChk(ierr); 187 } else 188 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 189 } 190 *array = impl->array; 191 } else if (mtype == CEED_MEM_DEVICE) { 192 if (impl->darray == NULL){ 193 // Vector doesn't own the data and was set on the CPU 194 if (impl->array == NULL) { 195 // call was made just to allocate memory 196 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 197 CeedChk(ierr); 198 } else 199 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 200 } 201 *array = impl->darray; 202 } else 203 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 204 205 return 0; 206 } 207 208 // ***************************************************************************** 209 // * There is no mtype here for array so it is not clear if we restore from HOST 210 // * memory or from DEVICE memory. We assume that it is CPU memory because if 211 // * it was GPU memory we would not call this routine at all. 212 // * Restore vector vec with values from array, where array received its values 213 // * from vec and possibly modified them. 214 // ***************************************************************************** 215 static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 216 CeedVector_Magma *impl = vec->data; 217 218 // Check if the array is a CPU pointer 219 if (*array == impl->array) { 220 // Update device, if the device pointer is not NULL 221 if (impl->darray != NULL) { 222 magma_setvector(vec->length, sizeof(*array[0]), 223 *array, 1, impl->darray, 1); 224 } else 225 ; // nothing to do (case of CPU use pointer) 226 227 } else if (impl->down_) { 228 // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 229 magma_getvector(vec->length, sizeof(*array[0]), 230 impl->darray, 1, impl->array, 1); 231 } 232 233 *array = NULL; 234 return 0; 235 } 236 237 // ***************************************************************************** 238 // * There is no mtype here for array so it is not clear if we restore from HOST 239 // * memory or from DEVICE memory. We assume that it is CPU memory because if 240 // * it was GPU memory we would not call this routine at all. 241 // * Restore vector vec with values from array, where array received its values 242 // * from vec to only read them; in this case vec may have been modified meanwhile 243 // * and needs to be restored here. 244 // ***************************************************************************** 245 static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 246 const CeedScalar **array) { 247 CeedVector_Magma *impl = vec->data; 248 249 // Check if the array is a CPU pointer 250 if (*array == impl->array) { 251 // Update device, if the device pointer is not NULL 252 if (impl->darray != NULL) 253 magma_setvector(vec->length, sizeof(*array[0]), 254 *array, 1, impl->darray, 1); 255 else 256 ; // nothing to do (case of CPU use pointer) 257 258 } else if (impl->down_) { 259 // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 260 magma_getvector(vec->length, sizeof(*array[0]), 261 impl->darray, 1, impl->array, 1); 262 } 263 264 *array = NULL; 265 return 0; 266 } 267 268 static int CeedVectorDestroy_Magma(CeedVector vec) { 269 CeedVector_Magma *impl = vec->data; 270 int ierr; 271 272 // Free if we own the data 273 if (impl->own_){ 274 ierr = magma_free_pinned(impl->array); CeedChk(ierr); 275 ierr = magma_free(impl->darray); CeedChk(ierr); 276 } else if (impl->down_){ 277 ierr = magma_free(impl->darray); CeedChk(ierr); 278 } 279 ierr = CeedFree(&vec->data); CeedChk(ierr); 280 return 0; 281 } 282 283 // ***************************************************************************** 284 // * Create vector vec of size n 285 // ***************************************************************************** 286 static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 287 CeedVector_Magma *impl; 288 int ierr; 289 290 vec->SetArray = CeedVectorSetArray_Magma; 291 vec->GetArray = CeedVectorGetArray_Magma; 292 vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 293 vec->RestoreArray = CeedVectorRestoreArray_Magma; 294 vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 295 vec->Destroy = CeedVectorDestroy_Magma; 296 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 297 impl->darray = NULL; 298 impl->array = NULL; 299 impl->own_ = 0; 300 impl->down_= 0; 301 vec->data = impl; 302 return 0; 303 } 304 305 // ***************************************************************************** 306 // * Apply restriction operator r to u: v = r(rmode) u 307 // ***************************************************************************** 308 static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 309 CeedTransposeMode tmode, CeedInt ncomp, 310 CeedTransposeMode lmode, CeedVector u, 311 CeedVector v, CeedRequest *request) { 312 CeedElemRestriction_Magma *impl = r->data; 313 int ierr; 314 const CeedScalar *uu; 315 CeedScalar *vv; 316 CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof; 317 CeedInt esize = nelem * elemsize; 318 CeedInt *indices = impl->indices, *dindices = impl->dindices; 319 320 //ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 321 //ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 322 323 // Get pointers on the device 324 ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr); 325 ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr); 326 327 if (tmode == CEED_NOTRANSPOSE) { 328 // Perform: v = r * u 329 if (ncomp == 1) { 330 //for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]]; 331 332 magma_template<<i=0:esize>> 333 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices){ 334 vv[i] = uu[dindices[i]]; 335 } 336 } else { 337 // vv is (elemsize x ncomp x nelem), column-major 338 if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 339 /* 340 for (CeedInt e = 0; e < nelem; e++) 341 for (CeedInt d = 0; d < ncomp; d++) 342 for (CeedInt i=0; i < elemsize; i++) { 343 vv[i + elemsize*(d+ncomp*e)] = 344 uu[indices[i+elemsize*e]+ndof*d]; 345 } 346 */ 347 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 348 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) { 349 vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d]; 350 } 351 } else { // u is (ncomp x ndof), column-major 352 /* 353 for (CeedInt e = 0; e < nelem; e++) 354 for (CeedInt d = 0; d < ncomp; d++) 355 for (CeedInt i=0; i< elemsize; i++) { 356 vv[i + elemsize*(d+ncomp*e)] = 357 uu[d+ncomp*indices[i+elemsize*e]]; 358 } 359 */ 360 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 361 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 362 vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]]; 363 } 364 } 365 } 366 } else { 367 // Note: in transpose mode, we perform: v += r^t * u 368 if (ncomp == 1) { 369 // fprintf(stderr,"3 ---------\n"); 370 // for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i]; 371 magma_template<<i=0:esize>> 372 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 373 magmablas_datomic_add( &vv[dindices[i]], uu[i]); 374 } 375 } else { // u is (elemsize x ncomp x nelem) 376 fprintf(stderr,"2 ---------\n"); 377 378 if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 379 /* 380 for (CeedInt e = 0; e < nelem; e++) 381 for (CeedInt d = 0; d < ncomp; d++) 382 for (CeedInt i=0; i < elemsize; i++) { 383 vv[indices[i + elemsize*e]+ndof*d] += 384 uu[i + elemsize*(d+e*ncomp)]; 385 } 386 */ 387 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 388 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) { 389 magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d], 390 uu[i+iend*(d+e*dend)]); 391 } 392 } else { // vv is (ncomp x ndof), column-major 393 /* 394 for (CeedInt e = 0; e < nelem; e++) 395 for (CeedInt d = 0; d < ncomp; d++) 396 for (CeedInt i=0; i < elemsize; i++) { 397 vv[d+ncomp*indices[i + elemsize*e]] += 398 uu[i + elemsize*(d+e*ncomp)]; 399 } 400 */ 401 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 402 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 403 magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]], 404 uu[i+iend*(d+e*dend)]); 405 } 406 } 407 } 408 } 409 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 410 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 411 412 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 413 *request = NULL; 414 return 0; 415 } 416 417 static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 418 CeedElemRestriction_Magma *impl = r->data; 419 int ierr; 420 421 // Free if we own the data 422 if (impl->own_){ 423 ierr = magma_free_pinned(impl->indices); CeedChk(ierr); 424 ierr = magma_free(impl->dindices); CeedChk(ierr); 425 } 426 else if (impl->down_){ 427 ierr = magma_free(impl->dindices); CeedChk(ierr); 428 } 429 ierr = CeedFree(&r->data); CeedChk(ierr); 430 return 0; 431 } 432 433 static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 434 CeedMemType mtype, 435 CeedCopyMode cmode, 436 const CeedInt *indices) { 437 int ierr, size = r->nelem*r->elemsize; 438 CeedElemRestriction_Magma *impl; 439 440 // Allocate memory for the MAGMA Restricton and initializa pointers to NULL 441 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 442 impl->dindices = NULL; 443 impl->indices = NULL; 444 impl->own_ = 0; 445 impl->down_= 0; 446 447 if (mtype == CEED_MEM_HOST) { 448 // memory is on the host; own_ = 0 449 switch (cmode) { 450 case CEED_COPY_VALUES: 451 ierr = magma_malloc( (void**)&impl->dindices, 452 size * sizeof(CeedInt)); CeedChk(ierr); 453 ierr = magma_malloc_pinned( (void**)&impl->indices, 454 size * sizeof(CeedInt)); CeedChk(ierr); 455 impl->own_ = 1; 456 457 if (indices != NULL) 458 magma_setvector(size, sizeof(CeedInt), 459 indices, 1, impl->dindices, 1); 460 break; 461 case CEED_OWN_POINTER: 462 ierr = magma_malloc( (void**)&impl->dindices, 463 size * sizeof(CeedInt)); CeedChk(ierr); 464 // TODO: possible problem here is if we are passed non-pinned memory; 465 // (as we own it, lter in destroy, we use free for pinned memory). 466 impl->indices = indices; 467 impl->own_ = 1; 468 469 if (indices != NULL) 470 magma_setvector(size, sizeof(CeedInt), 471 indices, 1, impl->dindices, 1); 472 break; 473 case CEED_USE_POINTER: 474 ierr = magma_malloc( (void**)&impl->dindices, 475 size * sizeof(CeedInt)); CeedChk(ierr); 476 magma_setvector(size, sizeof(CeedInt), 477 indices, 1, impl->dindices, 1); 478 impl->down_ = 1; 479 impl->indices = indices; 480 } 481 } else if (mtype == CEED_MEM_DEVICE) { 482 // memory is on the device; own = 0 483 switch (cmode) { 484 case CEED_COPY_VALUES: 485 ierr = magma_malloc( (void**)&impl->dindices, 486 size * sizeof(CeedInt)); CeedChk(ierr); 487 ierr = magma_malloc_pinned( (void**)&impl->indices, 488 size * sizeof(CeedInt)); CeedChk(ierr); 489 impl->own_ = 1; 490 491 if (indices) 492 magma_copyvector(size, sizeof(CeedInt), 493 indices, 1, impl->dindices, 1); 494 break; 495 case CEED_OWN_POINTER: 496 impl->dindices = indices; 497 ierr = magma_malloc_pinned( (void**)&impl->indices, 498 size * sizeof(CeedInt)); CeedChk(ierr); 499 impl->own_ = 1; 500 501 break; 502 case CEED_USE_POINTER: 503 impl->dindices = indices; 504 impl->indices = NULL; 505 } 506 507 } else 508 return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported"); 509 510 r->data = impl; 511 r->Apply = CeedElemRestrictionApply_Magma; 512 r->Destroy = CeedElemRestrictionDestroy_Magma; 513 514 return 0; 515 } 516 517 // Contracts on the middle index 518 // NOTRANSPOSE: V_ajc = T_jb U_abc 519 // TRANSPOSE: V_ajc = T_bj U_abc 520 // If Add != 0, "=" is replaced by "+=" 521 static int CeedTensorContract_Magma(Ceed ceed, 522 CeedInt A, CeedInt B, CeedInt C, CeedInt J, 523 const CeedScalar *t, CeedTransposeMode tmode, 524 const CeedInt Add, 525 const CeedScalar *u, CeedScalar *v) { 526 CeedInt tstride0 = B, tstride1 = 1; 527 if (tmode == CEED_TRANSPOSE) { 528 tstride0 = 1; tstride1 = J; 529 } 530 531 for (CeedInt a=0; a<A; a++) { 532 for (CeedInt j=0; j<J; j++) { 533 if (!Add) { 534 for (CeedInt c=0; c<C; c++) 535 v[(a*J+j)*C+c] = 0; 536 } 537 for (CeedInt b=0; b<B; b++) { 538 for (CeedInt c=0; c<C; c++) { 539 v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 540 } 541 } 542 } 543 } 544 return 0; 545 } 546 547 static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 548 CeedEvalMode emode, 549 const CeedScalar *u, CeedScalar *v) { 550 int ierr; 551 const CeedInt dim = basis->dim; 552 const CeedInt ndof = basis->ndof; 553 const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 554 const CeedInt add = (tmode == CEED_TRANSPOSE); 555 556 if (tmode == CEED_TRANSPOSE) { 557 const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 558 for (CeedInt i = 0; i < vsize; i++) 559 v[i] = (CeedScalar) 0; 560 } 561 if (emode & CEED_EVAL_INTERP) { 562 CeedInt P = basis->P1d, Q = basis->Q1d; 563 if (tmode == CEED_TRANSPOSE) { 564 P = basis->Q1d; Q = basis->P1d; 565 } 566 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 567 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 568 for (CeedInt d=0; d<dim; d++) { 569 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 570 tmode, add&&(d==dim-1), 571 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 572 CeedChk(ierr); 573 pre /= P; 574 post *= Q; 575 } 576 if (tmode == CEED_NOTRANSPOSE) { 577 v += nqpt; 578 } else { 579 u += nqpt; 580 } 581 } 582 if (emode & CEED_EVAL_GRAD) { 583 CeedInt P = basis->P1d, Q = basis->Q1d; 584 // In CEED_NOTRANSPOSE mode: 585 // u is (P^dim x nc), column-major layout (nc = ndof) 586 // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 587 // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 588 if (tmode == CEED_TRANSPOSE) { 589 P = basis->Q1d, Q = basis->P1d; 590 } 591 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 592 for (CeedInt p = 0; p < dim; p++) { 593 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 594 for (CeedInt d=0; d<dim; d++) { 595 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 596 (p==d)?basis->grad1d:basis->interp1d, 597 tmode, add&&(d==dim-1), 598 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 599 CeedChk(ierr); 600 pre /= P; 601 post *= Q; 602 } 603 if (tmode == CEED_NOTRANSPOSE) { 604 v += nqpt; 605 } else { 606 u += nqpt; 607 } 608 } 609 } 610 if (emode & CEED_EVAL_WEIGHT) { 611 if (tmode == CEED_TRANSPOSE) 612 return CeedError(basis->ceed, 1, 613 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 614 CeedInt Q = basis->Q1d; 615 for (CeedInt d=0; d<dim; d++) { 616 CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 617 for (CeedInt i=0; i<pre; i++) { 618 for (CeedInt j=0; j<Q; j++) { 619 for (CeedInt k=0; k<post; k++) { 620 v[(i*Q + j)*post + k] = basis->qweight1d[j] 621 * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 622 } 623 } 624 } 625 } 626 } 627 return 0; 628 } 629 630 static int CeedBasisDestroy_Magma(CeedBasis basis) { 631 return 0; 632 } 633 634 static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 635 CeedInt Q1d, const CeedScalar *interp1d, 636 const CeedScalar *grad1d, 637 const CeedScalar *qref1d, 638 const CeedScalar *qweight1d, 639 CeedBasis basis) { 640 basis->Apply = CeedBasisApply_Magma; 641 basis->Destroy = CeedBasisDestroy_Magma; 642 return 0; 643 } 644 645 static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q, 646 const CeedScalar *const *u, 647 CeedScalar *const *v) { 648 int ierr; 649 ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 650 return 0; 651 } 652 653 static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 654 return 0; 655 } 656 657 static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 658 qf->Apply = CeedQFunctionApply_Magma; 659 qf->Destroy = CeedQFunctionDestroy_Magma; 660 return 0; 661 } 662 663 static int CeedOperatorDestroy_Magma(CeedOperator op) { 664 CeedOperator_Magma *impl = op->data; 665 int ierr; 666 667 ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 668 ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 669 ierr = CeedFree(&op->data); CeedChk(ierr); 670 return 0; 671 } 672 673 static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata, 674 CeedVector ustate, 675 CeedVector residual, CeedRequest *request) { 676 CeedOperator_Magma *impl = op->data; 677 CeedVector etmp; 678 CeedInt Q; 679 const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 680 CeedScalar *Eu; 681 char *qd; 682 int ierr; 683 CeedTransposeMode lmode = CEED_NOTRANSPOSE; 684 685 if (!impl->etmp) { 686 ierr = CeedVectorCreate(op->ceed, 687 nc * op->Erestrict->nelem * op->Erestrict->elemsize, 688 &impl->etmp); CeedChk(ierr); 689 // etmp is allocated when CeedVectorGetArray is called below 690 } 691 etmp = impl->etmp; 692 if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 693 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 694 nc, lmode, ustate, etmp, 695 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 696 } 697 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 698 ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 699 ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 700 CeedChk(ierr); 701 for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 702 CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 703 const CeedScalar *in[5] = {0,0,0,0,0}; 704 // TODO: quadrature weights can be computed just once 705 ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 706 &Eu[e*op->Erestrict->elemsize*nc], BEu); 707 CeedChk(ierr); 708 CeedScalar *u_ptr = BEu, *v_ptr = BEv; 709 if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 710 if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 711 if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 712 if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 713 if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 714 ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 715 CeedChk(ierr); 716 ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 717 &Eu[e*op->Erestrict->elemsize*nc]); 718 CeedChk(ierr); 719 } 720 ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 721 if (residual) { 722 CeedScalar *res; 723 CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 724 for (int i = 0; i < residual->length; i++) 725 res[i] = (CeedScalar)0; 726 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 727 nc, lmode, etmp, residual, 728 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 729 } 730 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 731 *request = NULL; 732 return 0; 733 } 734 735 static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) { 736 CeedOperator_Magma *impl = op->data; 737 int ierr; 738 739 if (!impl->qdata) { 740 CeedInt Q; 741 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 742 ierr = CeedVectorCreate(op->ceed, 743 op->Erestrict->nelem * Q 744 * op->qf->qdatasize / sizeof(CeedScalar), 745 &impl->qdata); CeedChk(ierr); 746 } 747 *qdata = impl->qdata; 748 return 0; 749 } 750 751 static int CeedOperatorCreate_Magma(CeedOperator op) { 752 CeedOperator_Magma *impl; 753 int ierr; 754 755 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 756 op->data = impl; 757 op->Destroy = CeedOperatorDestroy_Magma; 758 op->Apply = CeedOperatorApply_Magma; 759 op->GetQData = CeedOperatorGetQData_Magma; 760 return 0; 761 } 762 763 // ***************************************************************************** 764 // * INIT 765 // ***************************************************************************** 766 static int CeedInit_Magma(const char *resource, Ceed ceed) { 767 if (strcmp(resource, "/cpu/magma") 768 && strcmp(resource, "/gpu/magma")) 769 return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 770 771 magma_init(); 772 //magma_print_environment(); 773 774 ceed->VecCreate = CeedVectorCreate_Magma; 775 ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 776 ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 777 ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 778 ceed->OperatorCreate = CeedOperatorCreate_Magma; 779 return 0; 780 } 781 782 // ***************************************************************************** 783 // * REGISTER 784 // ***************************************************************************** 785 __attribute__((constructor)) 786 static void Register(void) { 787 CeedRegister("/cpu/magma", CeedInit_Magma); 788 CeedRegister("/gpu/magma", CeedInit_Magma); 789 } 790