1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed-impl.h> 18 #include <string.h> 19 #include "magma.h" 20 #include "ceed-magma.h" 21 22 typedef struct { 23 CeedScalar *array; 24 CeedScalar *darray; 25 int own_; 26 int down_; 27 } CeedVector_Magma; 28 29 typedef struct { 30 CeedInt *indices; 31 CeedInt *dindices; 32 int own_; 33 int down_; // cover a case where we own Device memory 34 } CeedElemRestriction_Magma; 35 36 typedef struct { 37 CeedVector etmp; 38 CeedVector qdata; 39 } CeedOperator_Magma; 40 41 // ***************************************************************************** 42 // * Initialize vector vec (after free mem) with values from array based on cmode 43 // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 44 // * to array, and data is copied (not store passed pointer) 45 // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 46 // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 47 // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 48 // ***************************************************************************** 49 static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 50 CeedCopyMode cmode, CeedScalar *array) { 51 CeedVector_Magma *impl = vec->data; 52 int ierr; 53 54 // If own data, free the "old" data, e.g., as it may be of different size 55 if (impl->own_) { 56 magma_free( impl->darray ); 57 magma_free_pinned( impl->array ); 58 impl->darray = NULL; 59 impl->array = NULL; 60 impl->own_ = 0; 61 impl->down_= 0; 62 } 63 64 if (mtype == CEED_MEM_HOST) { 65 // memory is on the host; own_ = 0 66 switch (cmode) { 67 case CEED_COPY_VALUES: 68 ierr = magma_malloc( (void**)&impl->darray, 69 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 70 ierr = magma_malloc_pinned( (void**)&impl->array, 71 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 72 impl->own_ = 1; 73 74 if (array != NULL) 75 magma_setvector(vec->length, sizeof(array[0]), 76 array, 1, impl->darray, 1); 77 break; 78 case CEED_OWN_POINTER: 79 ierr = magma_malloc( (void**)&impl->darray, 80 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 81 // TODO: possible problem here is if we are passed non-pinned memory; 82 // (as we own it, lter in destroy, we use free for pinned memory). 83 impl->array = array; 84 impl->own_ = 1; 85 86 if (array != NULL) 87 magma_setvector(vec->length, sizeof(array[0]), 88 array, 1, impl->darray, 1); 89 break; 90 case CEED_USE_POINTER: 91 ierr = magma_malloc( (void**)&impl->darray, 92 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 93 magma_setvector(vec->length, sizeof(array[0]), 94 array, 1, impl->darray, 1); 95 impl->array = array; 96 } 97 } else if (mtype == CEED_MEM_DEVICE) { 98 // memory is on the device; own = 0 99 switch (cmode) { 100 case CEED_COPY_VALUES: 101 ierr = magma_malloc( (void**)&impl->darray, 102 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 103 ierr = magma_malloc_pinned( (void**)&impl->array, 104 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 105 impl->own_ = 1; 106 107 if (array) 108 magma_copyvector(vec->length, sizeof(array[0]), 109 array, 1, impl->darray, 1); 110 break; 111 case CEED_OWN_POINTER: 112 impl->darray = array; 113 ierr = magma_malloc_pinned( (void**)&impl->array, 114 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 115 impl->own_ = 1; 116 117 break; 118 case CEED_USE_POINTER: 119 impl->darray = array; 120 impl->array = NULL; 121 } 122 123 } else 124 return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 125 126 return 0; 127 } 128 129 // ***************************************************************************** 130 // * Give data pointer from vector vec to array (on HOST or DEVICE) 131 // ***************************************************************************** 132 static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 133 CeedScalar **array) { 134 CeedVector_Magma *impl = vec->data; 135 int ierr; 136 137 if (mtype == CEED_MEM_HOST) { 138 if (impl->own_) { 139 // data is owned so GPU had the most up-to-date version; copy it 140 magma_getvector(vec->length, sizeof(*array[0]), 141 impl->darray, 1, impl->array, 1); 142 } else if (impl->array == NULL) { 143 // Vector doesn't own the data and was set on GPU 144 if (impl->darray == NULL) { 145 // call was made just to allocate memory 146 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 147 CeedChk(ierr); 148 } else 149 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 150 } 151 *array = impl->array; 152 } else if (mtype == CEED_MEM_DEVICE) { 153 if (impl->darray == NULL) { 154 // Vector doesn't own the data and was set on the CPU 155 if (impl->array == NULL) { 156 // call was made just to allocate memory 157 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 158 CeedChk(ierr); 159 } else 160 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 161 } 162 *array = impl->darray; 163 } else 164 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 165 166 return 0; 167 } 168 169 // ***************************************************************************** 170 // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 171 // ***************************************************************************** 172 static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 173 const CeedScalar **array) { 174 CeedVector_Magma *impl = vec->data; 175 int ierr; 176 177 if (mtype == CEED_MEM_HOST) { 178 if (impl->own_) { 179 // data is owned so GPU had the most up-to-date version; copy it 180 magma_getvector(vec->length, sizeof(*array[0]), 181 impl->darray, 1, impl->array, 1); 182 } else if (impl->array == NULL) { 183 // Vector doesn't own the data and was set on GPU 184 if (impl->darray == NULL) { 185 // call was made just to allocate memory 186 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 187 CeedChk(ierr); 188 } else 189 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 190 } 191 *array = impl->array; 192 } else if (mtype == CEED_MEM_DEVICE) { 193 if (impl->darray == NULL) { 194 // Vector doesn't own the data and was set on the CPU 195 if (impl->array == NULL) { 196 // call was made just to allocate memory 197 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 198 CeedChk(ierr); 199 } else 200 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 201 } 202 *array = impl->darray; 203 } else 204 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 205 206 return 0; 207 } 208 209 // ***************************************************************************** 210 // * There is no mtype here for array so it is not clear if we restore from HOST 211 // * memory or from DEVICE memory. We assume that it is CPU memory because if 212 // * it was GPU memory we would not call this routine at all. 213 // * Restore vector vec with values from array, where array received its values 214 // * from vec and possibly modified them. 215 // ***************************************************************************** 216 static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 217 CeedVector_Magma *impl = vec->data; 218 219 // Check if the array is a CPU pointer 220 if (*array == impl->array) { 221 // Update device, if the device pointer is not NULL 222 if (impl->darray != NULL) { 223 magma_setvector(vec->length, sizeof(*array[0]), 224 *array, 1, impl->darray, 1); 225 } else 226 ; // nothing to do (case of CPU use pointer) 227 228 } else if (impl->down_) { 229 // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 230 magma_getvector(vec->length, sizeof(*array[0]), 231 impl->darray, 1, impl->array, 1); 232 } 233 234 *array = NULL; 235 return 0; 236 } 237 238 // ***************************************************************************** 239 // * There is no mtype here for array so it is not clear if we restore from HOST 240 // * memory or from DEVICE memory. We assume that it is CPU memory because if 241 // * it was GPU memory we would not call this routine at all. 242 // * Restore vector vec with values from array, where array received its values 243 // * from vec to only read them; in this case vec may have been modified meanwhile 244 // * and needs to be restored here. 245 // ***************************************************************************** 246 static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 247 const CeedScalar **array) { 248 CeedVector_Magma *impl = vec->data; 249 250 // Check if the array is a CPU pointer 251 if (*array == impl->array) { 252 // Update device, if the device pointer is not NULL 253 if (impl->darray != NULL) 254 magma_setvector(vec->length, sizeof(*array[0]), 255 *array, 1, impl->darray, 1); 256 else 257 ; // nothing to do (case of CPU use pointer) 258 259 } else if (impl->down_) { 260 // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 261 magma_getvector(vec->length, sizeof(*array[0]), 262 impl->darray, 1, impl->array, 1); 263 } 264 265 *array = NULL; 266 return 0; 267 } 268 269 static int CeedVectorDestroy_Magma(CeedVector vec) { 270 CeedVector_Magma *impl = vec->data; 271 int ierr; 272 273 // Free if we own the data 274 if (impl->own_) { 275 ierr = magma_free_pinned(impl->array); CeedChk(ierr); 276 ierr = magma_free(impl->darray); CeedChk(ierr); 277 } else if (impl->down_) { 278 ierr = magma_free(impl->darray); CeedChk(ierr); 279 } 280 ierr = CeedFree(&vec->data); CeedChk(ierr); 281 return 0; 282 } 283 284 // ***************************************************************************** 285 // * Create vector vec of size n 286 // ***************************************************************************** 287 static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 288 CeedVector_Magma *impl; 289 int ierr; 290 291 vec->SetArray = CeedVectorSetArray_Magma; 292 vec->GetArray = CeedVectorGetArray_Magma; 293 vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 294 vec->RestoreArray = CeedVectorRestoreArray_Magma; 295 vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 296 vec->Destroy = CeedVectorDestroy_Magma; 297 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 298 impl->darray = NULL; 299 impl->array = NULL; 300 impl->own_ = 0; 301 impl->down_= 0; 302 vec->data = impl; 303 return 0; 304 } 305 306 // ***************************************************************************** 307 // * Apply restriction operator r to u: v = r(rmode) u 308 // ***************************************************************************** 309 static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 310 CeedTransposeMode tmode, CeedInt ncomp, 311 CeedTransposeMode lmode, CeedVector u, 312 CeedVector v, CeedRequest *request) { 313 CeedElemRestriction_Magma *impl = r->data; 314 int ierr; 315 const CeedScalar *uu; 316 CeedScalar *vv; 317 CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof; 318 CeedInt esize = nelem * elemsize; 319 // CeedInt *indices = impl->indices; 320 CeedInt *dindices = impl->dindices; 321 322 //ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 323 //ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 324 325 // Get pointers on the device 326 ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr); 327 ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr); 328 329 if (tmode == CEED_NOTRANSPOSE) { 330 // Perform: v = r * u 331 if (ncomp == 1) { 332 //for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]]; 333 334 magma_template<<i=0:esize>> 335 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 336 vv[i] = uu[dindices[i]]; 337 } 338 } else { 339 // vv is (elemsize x ncomp x nelem), column-major 340 if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 341 /* 342 for (CeedInt e = 0; e < nelem; e++) 343 for (CeedInt d = 0; d < ncomp; d++) 344 for (CeedInt i=0; i < elemsize; i++) { 345 vv[i + elemsize*(d+ncomp*e)] = 346 uu[indices[i+elemsize*e]+ndof*d]; 347 } 348 */ 349 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 350 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) { 351 vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d]; 352 } 353 } else { // u is (ncomp x ndof), column-major 354 /* 355 for (CeedInt e = 0; e < nelem; e++) 356 for (CeedInt d = 0; d < ncomp; d++) 357 for (CeedInt i=0; i< elemsize; i++) { 358 vv[i + elemsize*(d+ncomp*e)] = 359 uu[d+ncomp*indices[i+elemsize*e]]; 360 } 361 */ 362 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 363 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 364 vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]]; 365 } 366 } 367 } 368 } else { 369 // Note: in transpose mode, we perform: v += r^t * u 370 if (ncomp == 1) { 371 // fprintf(stderr,"3 ---------\n"); 372 // for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i]; 373 magma_template<<i=0:esize>> 374 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 375 magmablas_datomic_add( &vv[dindices[i]], uu[i]); 376 } 377 } else { // u is (elemsize x ncomp x nelem) 378 fprintf(stderr,"2 ---------\n"); 379 380 if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 381 /* 382 for (CeedInt e = 0; e < nelem; e++) 383 for (CeedInt d = 0; d < ncomp; d++) 384 for (CeedInt i=0; i < elemsize; i++) { 385 vv[indices[i + elemsize*e]+ndof*d] += 386 uu[i + elemsize*(d+e*ncomp)]; 387 } 388 */ 389 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 390 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) { 391 magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d], 392 uu[i+iend*(d+e*dend)]); 393 } 394 } else { // vv is (ncomp x ndof), column-major 395 /* 396 for (CeedInt e = 0; e < nelem; e++) 397 for (CeedInt d = 0; d < ncomp; d++) 398 for (CeedInt i=0; i < elemsize; i++) { 399 vv[d+ncomp*indices[i + elemsize*e]] += 400 uu[i + elemsize*(d+e*ncomp)]; 401 } 402 */ 403 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 404 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 405 magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]], 406 uu[i+iend*(d+e*dend)]); 407 } 408 } 409 } 410 } 411 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 412 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 413 414 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 415 *request = NULL; 416 return 0; 417 } 418 419 static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 420 CeedElemRestriction_Magma *impl = r->data; 421 int ierr; 422 423 // Free if we own the data 424 if (impl->own_) { 425 ierr = magma_free_pinned(impl->indices); CeedChk(ierr); 426 ierr = magma_free(impl->dindices); CeedChk(ierr); 427 } else if (impl->down_) { 428 ierr = magma_free(impl->dindices); CeedChk(ierr); 429 } 430 ierr = CeedFree(&r->data); CeedChk(ierr); 431 return 0; 432 } 433 434 static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 435 CeedMemType mtype, 436 CeedCopyMode cmode, 437 const CeedInt *indices) { 438 int ierr, size = r->nelem*r->elemsize; 439 CeedElemRestriction_Magma *impl; 440 441 // Allocate memory for the MAGMA Restricton and initializa pointers to NULL 442 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 443 impl->dindices = NULL; 444 impl->indices = NULL; 445 impl->own_ = 0; 446 impl->down_= 0; 447 448 if (mtype == CEED_MEM_HOST) { 449 // memory is on the host; own_ = 0 450 switch (cmode) { 451 case CEED_COPY_VALUES: 452 ierr = magma_malloc( (void**)&impl->dindices, 453 size * sizeof(CeedInt)); CeedChk(ierr); 454 ierr = magma_malloc_pinned( (void**)&impl->indices, 455 size * sizeof(CeedInt)); CeedChk(ierr); 456 impl->own_ = 1; 457 458 if (indices != NULL) 459 magma_setvector(size, sizeof(CeedInt), 460 indices, 1, impl->dindices, 1); 461 break; 462 case CEED_OWN_POINTER: 463 ierr = magma_malloc( (void**)&impl->dindices, 464 size * sizeof(CeedInt)); CeedChk(ierr); 465 // TODO: possible problem here is if we are passed non-pinned memory; 466 // (as we own it, lter in destroy, we use free for pinned memory). 467 impl->indices = (CeedInt *)indices; 468 impl->own_ = 1; 469 470 if (indices != NULL) 471 magma_setvector(size, sizeof(CeedInt), 472 indices, 1, impl->dindices, 1); 473 break; 474 case CEED_USE_POINTER: 475 ierr = magma_malloc( (void**)&impl->dindices, 476 size * sizeof(CeedInt)); CeedChk(ierr); 477 magma_setvector(size, sizeof(CeedInt), 478 indices, 1, impl->dindices, 1); 479 impl->down_ = 1; 480 impl->indices = (CeedInt *)indices; 481 } 482 } else if (mtype == CEED_MEM_DEVICE) { 483 // memory is on the device; own = 0 484 switch (cmode) { 485 case CEED_COPY_VALUES: 486 ierr = magma_malloc( (void**)&impl->dindices, 487 size * sizeof(CeedInt)); CeedChk(ierr); 488 ierr = magma_malloc_pinned( (void**)&impl->indices, 489 size * sizeof(CeedInt)); CeedChk(ierr); 490 impl->own_ = 1; 491 492 if (indices) 493 magma_copyvector(size, sizeof(CeedInt), 494 indices, 1, impl->dindices, 1); 495 break; 496 case CEED_OWN_POINTER: 497 impl->dindices = (CeedInt *)indices; 498 ierr = magma_malloc_pinned( (void**)&impl->indices, 499 size * sizeof(CeedInt)); CeedChk(ierr); 500 impl->own_ = 1; 501 502 break; 503 case CEED_USE_POINTER: 504 impl->dindices = (CeedInt *)indices; 505 impl->indices = NULL; 506 } 507 508 } else 509 return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported"); 510 511 r->data = impl; 512 r->Apply = CeedElemRestrictionApply_Magma; 513 r->Destroy = CeedElemRestrictionDestroy_Magma; 514 515 return 0; 516 } 517 518 // Contracts on the middle index 519 // NOTRANSPOSE: V_ajc = T_jb U_abc 520 // TRANSPOSE: V_ajc = T_bj U_abc 521 // If Add != 0, "=" is replaced by "+=" 522 static int CeedTensorContract_Magma(Ceed ceed, 523 CeedInt A, CeedInt B, CeedInt C, CeedInt J, 524 const CeedScalar *t, CeedTransposeMode tmode, 525 const CeedInt Add, 526 const CeedScalar *u, CeedScalar *v) { 527 #ifdef USE_MAGMA_BATCH 528 magma_dtensor_contract(ceed, A, B, C, J, t, tmode, Add, u, v); 529 #else 530 CeedInt tstride0 = B, tstride1 = 1; 531 if (tmode == CEED_TRANSPOSE) { 532 tstride0 = 1; tstride1 = J; 533 } 534 CeedDebug("\033[31m[CeedTensorContract] A=%d, J=%d, C=%d, B=%d: %d %d %d", 535 A,J,C,B,A*J*B*C, C*J*A, C*B*A); 536 for (CeedInt a=0; a<A; a++) { 537 for (CeedInt j=0; j<J; j++) { 538 if (!Add) { 539 for (CeedInt c=0; c<C; c++) 540 v[(a*J+j)*C+c] = 0; 541 } 542 for (CeedInt b=0; b<B; b++) { 543 for (CeedInt c=0; c<C; c++) { 544 v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 545 } 546 } 547 } 548 } 549 #endif 550 return 0; 551 } 552 553 static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 554 CeedEvalMode emode, 555 const CeedScalar *u, CeedScalar *v) { 556 int ierr; 557 const CeedInt dim = basis->dim; 558 const CeedInt ndof = basis->ndof; 559 const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 560 const CeedInt add = (tmode == CEED_TRANSPOSE); 561 562 CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d", 563 ndof*CeedPowInt(basis->P1d, dim)); 564 565 if (tmode == CEED_TRANSPOSE) { 566 const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 567 for (CeedInt i = 0; i < vsize; i++) 568 v[i] = (CeedScalar) 0; 569 } 570 if (emode & CEED_EVAL_INTERP) { 571 CeedInt P = basis->P1d, Q = basis->Q1d; 572 if (tmode == CEED_TRANSPOSE) { 573 P = basis->Q1d; Q = basis->P1d; 574 } 575 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 576 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 577 CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 578 ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)); 579 for (CeedInt d=0; d<dim; d++) { 580 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 581 tmode, add&&(d==dim-1), 582 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 583 CeedChk(ierr); 584 pre /= P; 585 post *= Q; 586 } 587 if (tmode == CEED_NOTRANSPOSE) { 588 v += nqpt; 589 } else { 590 u += nqpt; 591 } 592 } 593 if (emode & CEED_EVAL_GRAD) { 594 CeedInt P = basis->P1d, Q = basis->Q1d; 595 // In CEED_NOTRANSPOSE mode: 596 // u is (P^dim x nc), column-major layout (nc = ndof) 597 // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 598 // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 599 if (tmode == CEED_TRANSPOSE) { 600 P = basis->Q1d, Q = basis->P1d; 601 } 602 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 603 CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 604 ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)); 605 for (CeedInt p = 0; p < dim; p++) { 606 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 607 for (CeedInt d=0; d<dim; d++) { 608 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 609 (p==d)?basis->grad1d:basis->interp1d, 610 tmode, add&&(d==dim-1), 611 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 612 CeedChk(ierr); 613 pre /= P; 614 post *= Q; 615 } 616 if (tmode == CEED_NOTRANSPOSE) { 617 v += nqpt; 618 } else { 619 u += nqpt; 620 } 621 } 622 } 623 if (emode & CEED_EVAL_WEIGHT) { 624 if (tmode == CEED_TRANSPOSE) 625 return CeedError(basis->ceed, 1, 626 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 627 CeedInt Q = basis->Q1d; 628 for (CeedInt d=0; d<dim; d++) { 629 CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 630 for (CeedInt i=0; i<pre; i++) { 631 for (CeedInt j=0; j<Q; j++) { 632 for (CeedInt k=0; k<post; k++) { 633 v[(i*Q + j)*post + k] = basis->qweight1d[j] 634 * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 635 } 636 } 637 } 638 } 639 } 640 return 0; 641 } 642 643 static int CeedBasisDestroy_Magma(CeedBasis basis) { 644 return 0; 645 } 646 647 static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 648 CeedInt Q1d, const CeedScalar *interp1d, 649 const CeedScalar *grad1d, 650 const CeedScalar *qref1d, 651 const CeedScalar *qweight1d, 652 CeedBasis basis) { 653 basis->Apply = CeedBasisApply_Magma; 654 basis->Destroy = CeedBasisDestroy_Magma; 655 return 0; 656 } 657 658 static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q, 659 const CeedScalar *const *u, 660 CeedScalar *const *v) { 661 int ierr; 662 ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 663 return 0; 664 } 665 666 static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 667 return 0; 668 } 669 670 static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 671 qf->Apply = CeedQFunctionApply_Magma; 672 qf->Destroy = CeedQFunctionDestroy_Magma; 673 return 0; 674 } 675 676 static int CeedOperatorDestroy_Magma(CeedOperator op) { 677 CeedOperator_Magma *impl = op->data; 678 int ierr; 679 680 ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 681 ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 682 ierr = CeedFree(&op->data); CeedChk(ierr); 683 return 0; 684 } 685 686 static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata, 687 CeedVector ustate, 688 CeedVector residual, CeedRequest *request) { 689 CeedOperator_Magma *impl = op->data; 690 CeedVector etmp; 691 CeedInt Q; 692 const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 693 CeedScalar *Eu; 694 char *qd; 695 int ierr; 696 CeedTransposeMode lmode = CEED_NOTRANSPOSE; 697 698 if (!impl->etmp) { 699 ierr = CeedVectorCreate(op->ceed, 700 nc * op->Erestrict->nelem * op->Erestrict->elemsize, 701 &impl->etmp); CeedChk(ierr); 702 // etmp is allocated when CeedVectorGetArray is called below 703 } 704 etmp = impl->etmp; 705 if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 706 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 707 nc, lmode, ustate, etmp, 708 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 709 } 710 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 711 ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 712 ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 713 CeedChk(ierr); 714 715 for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 716 CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 717 const CeedScalar *in[5] = {0,0,0,0,0}; 718 // TODO: quadrature weights can be computed just once 719 CeedDebug("\033[11m[CeedOperatorApply_Magma] e=%d: Eu+%d, %d", 720 e, e*op->Erestrict->elemsize*nc, Q*nc*(dim+2)); 721 ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 722 &Eu[e*op->Erestrict->elemsize*nc], BEu); 723 CeedChk(ierr); 724 CeedScalar *u_ptr = BEu, *v_ptr = BEv; 725 if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 726 if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 727 if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 728 if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 729 if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 730 ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 731 CeedChk(ierr); 732 CeedDebug("\033[31m[CeedOperatorApply_Magma] e=%d: ",e); 733 ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 734 &Eu[e*op->Erestrict->elemsize*nc]); 735 CeedChk(ierr); 736 } 737 ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 738 if (residual) { 739 CeedScalar *res; 740 CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 741 for (int i = 0; i < residual->length; i++) 742 res[i] = (CeedScalar)0; 743 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 744 nc, lmode, etmp, residual, 745 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 746 } 747 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 748 *request = NULL; 749 return 0; 750 } 751 752 static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) { 753 CeedOperator_Magma *impl = op->data; 754 int ierr; 755 756 if (!impl->qdata) { 757 CeedInt Q; 758 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 759 ierr = CeedVectorCreate(op->ceed, 760 op->Erestrict->nelem * Q 761 * op->qf->qdatasize / sizeof(CeedScalar), 762 &impl->qdata); CeedChk(ierr); 763 } 764 *qdata = impl->qdata; 765 return 0; 766 } 767 768 static int CeedOperatorCreate_Magma(CeedOperator op) { 769 CeedOperator_Magma *impl; 770 int ierr; 771 772 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 773 op->data = impl; 774 op->Destroy = CeedOperatorDestroy_Magma; 775 op->Apply = CeedOperatorApply_Magma; 776 op->GetQData = CeedOperatorGetQData_Magma; 777 return 0; 778 } 779 780 // ***************************************************************************** 781 // * INIT 782 // ***************************************************************************** 783 static int CeedInit_Magma(const char *resource, Ceed ceed) { 784 if (strcmp(resource, "/cpu/magma") 785 && strcmp(resource, "/gpu/magma")) 786 return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 787 788 magma_init(); 789 //magma_print_environment(); 790 791 ceed->VecCreate = CeedVectorCreate_Magma; 792 ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 793 ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 794 ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 795 ceed->OperatorCreate = CeedOperatorCreate_Magma; 796 return 0; 797 } 798 799 // ***************************************************************************** 800 // * REGISTER 801 // ***************************************************************************** 802 __attribute__((constructor)) 803 static void Register(void) { 804 CeedRegister("/cpu/magma", CeedInit_Magma); 805 CeedRegister("/gpu/magma", CeedInit_Magma); 806 } 807