1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-magma.h" 18 #include <string.h> 19 20 typedef struct { 21 CeedScalar *array; 22 CeedScalar *darray; 23 int own_; 24 int down_; 25 } CeedVector_Magma; 26 27 typedef struct { 28 CeedInt *indices; 29 CeedInt *dindices; 30 int own_; 31 int down_; // cover a case where we own Device memory 32 } CeedElemRestriction_Magma; 33 34 typedef struct { 35 const CeedScalar **inputs; 36 CeedScalar **outputs; 37 } CeedQFunction_Magma; 38 39 typedef struct { 40 CeedVector *Evecs; /// E-vectors needed to apply operator (in followed by out) 41 CeedScalar **Edata; 42 CeedVector *evecsin; /// Input E-vectors needed to apply operator 43 CeedVector *evecsout; /// Output E-vectors needed to apply operator 44 CeedVector *qvecsin; /// Input Q-vectors needed to apply operator 45 CeedVector *qvecsout; /// Output Q-vectors needed to apply operator 46 CeedInt numein; 47 CeedInt numeout; 48 } CeedOperator_Magma; 49 50 // ***************************************************************************** 51 // * Initialize vector vec (after free mem) with values from array based on cmode 52 // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 53 // * to array, and data is copied (not store passed pointer) 54 // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 55 // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 56 // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 57 // ***************************************************************************** 58 static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 59 CeedCopyMode cmode, CeedScalar *array) { 60 CeedVector_Magma *impl = vec->data; 61 int ierr; 62 63 // If own data, free the "old" data, e.g., as it may be of different size 64 if (impl->own_) { 65 magma_free( impl->darray ); 66 magma_free_pinned( impl->array ); 67 impl->darray = NULL; 68 impl->array = NULL; 69 impl->own_ = 0; 70 impl->down_= 0; 71 } 72 73 if (mtype == CEED_MEM_HOST) { 74 // memory is on the host; own_ = 0 75 switch (cmode) { 76 case CEED_COPY_VALUES: 77 ierr = magma_malloc( (void **)&impl->darray, 78 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 79 ierr = magma_malloc_pinned( (void **)&impl->array, 80 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 81 impl->own_ = 1; 82 83 if (array != NULL) 84 magma_setvector(vec->length, sizeof(array[0]), 85 array, 1, impl->darray, 1); 86 break; 87 case CEED_OWN_POINTER: 88 ierr = magma_malloc( (void **)&impl->darray, 89 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 90 // TODO: possible problem here is if we are passed non-pinned memory; 91 // (as we own it, lter in destroy, we use free for pinned memory). 92 impl->array = array; 93 impl->own_ = 1; 94 95 if (array != NULL) 96 magma_setvector(vec->length, sizeof(array[0]), 97 array, 1, impl->darray, 1); 98 break; 99 case CEED_USE_POINTER: 100 ierr = magma_malloc( (void **)&impl->darray, 101 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 102 magma_setvector(vec->length, sizeof(array[0]), 103 array, 1, impl->darray, 1); 104 105 impl->down_ = 1; 106 impl->array = array; 107 } 108 } else if (mtype == CEED_MEM_DEVICE) { 109 // memory is on the device; own = 0 110 switch (cmode) { 111 case CEED_COPY_VALUES: 112 ierr = magma_malloc( (void **)&impl->darray, 113 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 114 ierr = magma_malloc_pinned( (void **)&impl->array, 115 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 116 impl->own_ = 1; 117 118 if (array) 119 magma_copyvector(vec->length, sizeof(array[0]), 120 array, 1, impl->darray, 1); 121 else 122 // t30 assumes allocation initializes with 0s 123 magma_setvector(vec->length, sizeof(array[0]), 124 impl->array, 1, impl->darray, 1); 125 break; 126 case CEED_OWN_POINTER: 127 impl->darray = array; 128 ierr = magma_malloc_pinned( (void **)&impl->array, 129 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 130 impl->own_ = 1; 131 132 break; 133 case CEED_USE_POINTER: 134 impl->darray = array; 135 impl->array = NULL; 136 } 137 138 } else 139 return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 140 141 return 0; 142 } 143 144 // ***************************************************************************** 145 // * Give data pointer from vector vec to array (on HOST or DEVICE) 146 // ***************************************************************************** 147 static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 148 CeedScalar **array) { 149 CeedVector_Magma *impl = vec->data; 150 int ierr; 151 152 if (mtype == CEED_MEM_HOST) { 153 if (impl->own_) { 154 // data is owned so GPU had the most up-to-date version; copy it 155 // TTT - apparantly it doesn't have most up to date data 156 magma_getvector(vec->length, sizeof(*array[0]), 157 impl->darray, 1, impl->array, 1); 158 CeedDebug("\033[31m[CeedVectorGetArray_Magma]"); 159 //fprintf(stderr,"rrrrrrrrrrrrrrr\n"); 160 } else if (impl->array == NULL) { 161 // Vector doesn't own the data and was set on GPU 162 if (impl->darray == NULL) { 163 // call was made just to allocate memory 164 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 165 CeedChk(ierr); 166 } else 167 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 168 } 169 *array = impl->array; 170 } else if (mtype == CEED_MEM_DEVICE) { 171 if (impl->darray == NULL) { 172 // Vector doesn't own the data and was set on the CPU 173 if (impl->array == NULL) { 174 // call was made just to allocate memory 175 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 176 CeedChk(ierr); 177 } else 178 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 179 } 180 *array = impl->darray; 181 } else 182 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 183 184 return 0; 185 } 186 187 // ***************************************************************************** 188 // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 189 // ***************************************************************************** 190 static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 191 const CeedScalar **array) { 192 CeedVector_Magma *impl = vec->data; 193 int ierr; 194 195 if (mtype == CEED_MEM_HOST) { 196 if (impl->own_) { 197 // data is owned so GPU had the most up-to-date version; copy it 198 magma_getvector(vec->length, sizeof(*array[0]), 199 impl->darray, 1, impl->array, 1); 200 } else if (impl->array == NULL) { 201 // Vector doesn't own the data and was set on GPU 202 if (impl->darray == NULL) { 203 // call was made just to allocate memory 204 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 205 CeedChk(ierr); 206 } else 207 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 208 } 209 *array = impl->array; 210 } else if (mtype == CEED_MEM_DEVICE) { 211 if (impl->darray == NULL) { 212 // Vector doesn't own the data and was set on the CPU 213 if (impl->array == NULL) { 214 // call was made just to allocate memory 215 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 216 CeedChk(ierr); 217 } else 218 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 219 } 220 *array = impl->darray; 221 } else 222 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 223 224 return 0; 225 } 226 227 // ***************************************************************************** 228 // * There is no mtype here for array so it is not clear if we restore from HOST 229 // * memory or from DEVICE memory. We assume that it is CPU memory because if 230 // * it was GPU memory we would not call this routine at all. 231 // * Restore vector vec with values from array, where array received its values 232 // * from vec and possibly modified them. 233 // ***************************************************************************** 234 static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 235 CeedVector_Magma *impl = vec->data; 236 237 // Check if the array is a CPU pointer 238 if (*array == impl->array) { 239 // Update device, if the device pointer is not NULL 240 if (impl->darray != NULL) { 241 magma_setvector(vec->length, sizeof(*array[0]), 242 *array, 1, impl->darray, 1); 243 } else { 244 // nothing to do (case of CPU use pointer) 245 } 246 247 } else if (impl->down_) { 248 // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 249 magma_getvector(vec->length, sizeof(*array[0]), 250 impl->darray, 1, impl->array, 1); 251 } 252 253 *array = NULL; 254 return 0; 255 } 256 257 // ***************************************************************************** 258 // * There is no mtype here for array so it is not clear if we restore from HOST 259 // * memory or from DEVICE memory. We assume that it is CPU memory because if 260 // * it was GPU memory we would not call this routine at all. 261 // * Restore vector vec with values from array, where array received its values 262 // * from vec to only read them; in this case vec may have been modified meanwhile 263 // * and needs to be restored here. 264 // ***************************************************************************** 265 static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 266 const CeedScalar **array) { 267 CeedVector_Magma *impl = vec->data; 268 269 // Check if the array is a CPU pointer 270 if (*array == impl->array) { 271 // Update device, if the device pointer is not NULL 272 if (impl->darray != NULL) { 273 magma_setvector(vec->length, sizeof(*array[0]), 274 *array, 1, impl->darray, 1); 275 } else { 276 // nothing to do (case of CPU use pointer) 277 } 278 279 } else if (impl->down_) { 280 // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 281 magma_getvector(vec->length, sizeof(*array[0]), 282 impl->darray, 1, impl->array, 1); 283 } 284 285 *array = NULL; 286 return 0; 287 } 288 289 static int CeedVectorDestroy_Magma(CeedVector vec) { 290 CeedVector_Magma *impl = vec->data; 291 int ierr; 292 293 // Free if we own the data 294 if (impl->own_) { 295 ierr = magma_free_pinned(impl->array); CeedChk(ierr); 296 ierr = magma_free(impl->darray); CeedChk(ierr); 297 } else if (impl->down_) { 298 ierr = magma_free(impl->darray); CeedChk(ierr); 299 } 300 ierr = CeedFree(&vec->data); CeedChk(ierr); 301 return 0; 302 } 303 304 // ***************************************************************************** 305 // * Create vector vec of size n 306 // ***************************************************************************** 307 static int CeedVectorCreate_Magma(CeedInt n, CeedVector vec) { 308 CeedVector_Magma *impl; 309 int ierr; 310 311 vec->SetArray = CeedVectorSetArray_Magma; 312 vec->GetArray = CeedVectorGetArray_Magma; 313 vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 314 vec->RestoreArray = CeedVectorRestoreArray_Magma; 315 vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 316 vec->Destroy = CeedVectorDestroy_Magma; 317 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 318 impl->darray = NULL; 319 impl->array = NULL; 320 impl->own_ = 0; 321 impl->down_= 0; 322 vec->data = impl; 323 return 0; 324 } 325 326 327 // ***************************************************************************** 328 // * Apply restriction operator r to u: v = r(rmode) u 329 // ***************************************************************************** 330 static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 331 CeedTransposeMode tmode, 332 CeedTransposeMode lmode, CeedVector u, 333 CeedVector v, CeedRequest *request) { 334 CeedElemRestriction_Magma *impl = r->data; 335 int ierr; 336 const CeedScalar *uu; 337 CeedScalar *vv; 338 CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof, 339 ncomp=r->ncomp; 340 CeedInt esize = nelem * elemsize; 341 342 #ifdef USE_MAGMA_BATCH2 343 CeedInt *dindices = impl->dindices; 344 // Get pointers on the device 345 ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr); 346 ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr); 347 #else 348 CeedInt *indices = impl->indices; 349 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 350 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 351 #endif 352 353 if (tmode == CEED_NOTRANSPOSE) { 354 // Perform: v = r * u 355 if (!impl->indices) { 356 for (CeedInt i=0; i<esize*ncomp; i++) vv[i] = uu[i]; 357 } else if (ncomp == 1) { 358 #ifdef USE_MAGMA_BATCH2 359 magma_template<<i=0:esize>> 360 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 361 vv[i] = uu[dindices[i]]; 362 } 363 #else 364 for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]]; 365 #endif 366 } else { 367 // vv is (elemsize x ncomp x nelem), column-major 368 if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 369 #ifdef USE_MAGMA_BATCH2 370 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 371 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) { 372 vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d]; 373 } 374 #else 375 for (CeedInt e = 0; e < nelem; e++) 376 for (CeedInt d = 0; d < ncomp; d++) 377 for (CeedInt i=0; i < elemsize; i++) { 378 vv[i + elemsize*(d+ncomp*e)] = 379 uu[indices[i+elemsize*e]+ndof*d]; 380 } 381 #endif 382 } else { // u is (ncomp x ndof), column-major 383 #ifdef USE_MAGMA_BATCH2 384 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 385 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 386 vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]]; 387 } 388 #else 389 for (CeedInt e = 0; e < nelem; e++) 390 for (CeedInt d = 0; d < ncomp; d++) 391 for (CeedInt i=0; i< elemsize; i++) { 392 vv[i + elemsize*(d+ncomp*e)] = 393 uu[d+ncomp*indices[i+elemsize*e]]; 394 } 395 #endif 396 } 397 } 398 } else { 399 // Note: in transpose mode, we perform: v += r^t * u 400 if (!impl->indices) { 401 for (CeedInt i=0; i<esize; i++) vv[i] += uu[i]; 402 } else if (ncomp == 1) { 403 // fprintf(stderr,"3 ---------\n"); 404 #ifdef USE_MAGMA_BATCH2 405 magma_template<<i=0:esize>> 406 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 407 magmablas_datomic_add( &vv[dindices[i]], uu[i]); 408 } 409 #else 410 for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i]; 411 #endif 412 } else { // u is (elemsize x ncomp x nelem) 413 fprintf(stderr,"2 ---------\n"); 414 415 if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 416 #ifdef USE_MAGMA_BATCH2 417 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 418 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) { 419 magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d], 420 uu[i+iend*(d+e*dend)]); 421 } 422 #else 423 for (CeedInt e = 0; e < nelem; e++) 424 for (CeedInt d = 0; d < ncomp; d++) 425 for (CeedInt i=0; i < elemsize; i++) { 426 vv[indices[i + elemsize*e]+ndof*d] += 427 uu[i + elemsize*(d+e*ncomp)]; 428 } 429 #endif 430 } else { // vv is (ncomp x ndof), column-major 431 #ifdef USE_MAGMA_BATCH2 432 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 433 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 434 magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]], 435 uu[i+iend*(d+e*dend)]); 436 } 437 #else 438 for (CeedInt e = 0; e < nelem; e++) 439 for (CeedInt d = 0; d < ncomp; d++) 440 for (CeedInt i=0; i < elemsize; i++) { 441 vv[d+ncomp*indices[i + elemsize*e]] += 442 uu[i + elemsize*(d+e*ncomp)]; 443 } 444 #endif 445 } 446 } 447 } 448 449 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 450 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 451 452 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 453 *request = NULL; 454 return 0; 455 } 456 457 static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 458 CeedElemRestriction_Magma *impl = r->data; 459 int ierr; 460 461 // Free if we own the data 462 if (impl->own_) { 463 ierr = magma_free_pinned(impl->indices); CeedChk(ierr); 464 ierr = magma_free(impl->dindices); CeedChk(ierr); 465 } else if (impl->down_) { 466 ierr = magma_free(impl->dindices); CeedChk(ierr); 467 } 468 ierr = CeedFree(&r->data); CeedChk(ierr); 469 return 0; 470 } 471 472 static int CeedElemRestrictionCreate_Magma(CeedMemType mtype, 473 CeedCopyMode cmode, 474 const CeedInt *indices, CeedElemRestriction r) { 475 int ierr, size = r->nelem*r->elemsize; 476 CeedElemRestriction_Magma *impl; 477 478 // Allocate memory for the MAGMA Restricton and initializa pointers to NULL 479 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 480 impl->dindices = NULL; 481 impl->indices = NULL; 482 impl->own_ = 0; 483 impl->down_= 0; 484 485 if (mtype == CEED_MEM_HOST) { 486 // memory is on the host; own_ = 0 487 switch (cmode) { 488 case CEED_COPY_VALUES: 489 ierr = magma_malloc( (void **)&impl->dindices, 490 size * sizeof(CeedInt)); CeedChk(ierr); 491 ierr = magma_malloc_pinned( (void **)&impl->indices, 492 size * sizeof(CeedInt)); CeedChk(ierr); 493 impl->own_ = 1; 494 495 if (indices != NULL) { 496 memcpy(impl->indices, indices, size * sizeof(indices[0])); 497 magma_setvector(size, sizeof(CeedInt), 498 impl->indices, 1, impl->dindices, 1); 499 } 500 break; 501 case CEED_OWN_POINTER: 502 ierr = magma_malloc( (void **)&impl->dindices, 503 size * sizeof(CeedInt)); CeedChk(ierr); 504 // TODO: possible problem here is if we are passed non-pinned memory; 505 // (as we own it, lter in destroy, we use free for pinned memory). 506 impl->indices = (CeedInt *)indices; 507 impl->own_ = 1; 508 509 if (indices != NULL) 510 magma_setvector(size, sizeof(CeedInt), 511 indices, 1, impl->dindices, 1); 512 break; 513 case CEED_USE_POINTER: 514 ierr = magma_malloc( (void **)&impl->dindices, 515 size * sizeof(CeedInt)); CeedChk(ierr); 516 magma_setvector(size, sizeof(CeedInt), 517 indices, 1, impl->dindices, 1); 518 impl->down_ = 1; 519 impl->indices = (CeedInt *)indices; 520 } 521 } else if (mtype == CEED_MEM_DEVICE) { 522 // memory is on the device; own = 0 523 switch (cmode) { 524 case CEED_COPY_VALUES: 525 ierr = magma_malloc( (void **)&impl->dindices, 526 size * sizeof(CeedInt)); CeedChk(ierr); 527 ierr = magma_malloc_pinned( (void **)&impl->indices, 528 size * sizeof(CeedInt)); CeedChk(ierr); 529 impl->own_ = 1; 530 531 if (indices) 532 magma_copyvector(size, sizeof(CeedInt), 533 indices, 1, impl->dindices, 1); 534 break; 535 case CEED_OWN_POINTER: 536 impl->dindices = (CeedInt *)indices; 537 ierr = magma_malloc_pinned( (void **)&impl->indices, 538 size * sizeof(CeedInt)); CeedChk(ierr); 539 impl->own_ = 1; 540 541 break; 542 case CEED_USE_POINTER: 543 impl->dindices = (CeedInt *)indices; 544 impl->indices = NULL; 545 } 546 547 } else 548 return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported"); 549 550 r->data = impl; 551 r->Apply = CeedElemRestrictionApply_Magma; 552 r->Destroy = CeedElemRestrictionDestroy_Magma; 553 554 return 0; 555 } 556 557 static int CeedElemRestrictionCreateBlocked_Magma(CeedMemType mtype, 558 CeedCopyMode cmode, 559 const CeedInt *indices, CeedElemRestriction r) { 560 return CeedError(r->ceed, 1, "Backend does not implement blocked restrictions"); 561 } 562 563 // Contracts on the middle index 564 // NOTRANSPOSE: V_ajc = T_jb U_abc 565 // TRANSPOSE: V_ajc = T_bj U_abc 566 // If Add != 0, "=" is replaced by "+=" 567 static int CeedTensorContract_Magma(Ceed ceed, 568 CeedInt A, CeedInt B, CeedInt C, CeedInt J, 569 const CeedScalar *t, CeedTransposeMode tmode, 570 const CeedInt Add, 571 const CeedScalar *u, CeedScalar *v) { 572 #ifdef USE_MAGMA_BATCH 573 magma_dtensor_contract(ceed, A, B, C, J, t, tmode, Add, u, v); 574 #else 575 CeedInt tstride0 = B, tstride1 = 1; 576 if (tmode == CEED_TRANSPOSE) { 577 tstride0 = 1; tstride1 = J; 578 } 579 CeedDebug("\033[31m[CeedTensorContract] A=%d, J=%d, C=%d, B=%d: %d %d %d", 580 A,J,C,B,A*J*B*C, C*J*A, C*B*A); 581 for (CeedInt a=0; a<A; a++) { 582 for (CeedInt j=0; j<J; j++) { 583 if (!Add) { 584 for (CeedInt c=0; c<C; c++) 585 v[(a*J+j)*C+c] = 0; 586 } 587 for (CeedInt b=0; b<B; b++) { 588 for (CeedInt c=0; c<C; c++) { 589 v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 590 } 591 } 592 } 593 } 594 #endif 595 return 0; 596 } 597 598 static int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, 599 CeedTransposeMode tmode, CeedEvalMode emode, 600 CeedVector U, CeedVector V) { 601 int ierr; 602 const CeedInt dim = basis->dim; 603 const CeedInt ncomp = basis->ncomp; 604 const CeedInt nqpt = ncomp*CeedPowInt(basis->Q1d, dim); 605 const CeedInt add = (tmode == CEED_TRANSPOSE); 606 const CeedScalar *u; 607 CeedScalar *v; 608 if (U) { 609 ierr = CeedVectorGetArrayRead(U, CEED_MEM_HOST, &u); CeedChk(ierr); 610 } else if (emode != CEED_EVAL_WEIGHT) { 611 return CeedError(ceed, 1, 612 "An input vector is required for this CeedEvalMode"); 613 } 614 ierr = CeedVectorGetArray(V, CEED_MEM_HOST, &v); CeedChk(ierr); 615 616 if (nelem != 1) 617 return CeedError(basis->ceed, 1, 618 "This backend does not support BasisApply for multiple elements"); 619 620 CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d", 621 ncomp*CeedPowInt(basis->P1d, dim)); 622 623 if (tmode == CEED_TRANSPOSE) { 624 const CeedInt vsize = ncomp*CeedPowInt(basis->P1d, dim); 625 for (CeedInt i = 0; i < vsize; i++) 626 v[i] = (CeedScalar) 0; 627 } 628 if (emode & CEED_EVAL_INTERP) { 629 CeedInt P = basis->P1d, Q = basis->Q1d; 630 if (tmode == CEED_TRANSPOSE) { 631 P = basis->Q1d; Q = basis->P1d; 632 } 633 CeedInt pre = ncomp*CeedPowInt(P, dim-1), post = 1; 634 CeedScalar tmp[2][ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 635 CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 636 ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)); 637 for (CeedInt d=0; d<dim; d++) { 638 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 639 tmode, add&&(d==dim-1), 640 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 641 CeedChk(ierr); 642 pre /= P; 643 post *= Q; 644 } 645 if (tmode == CEED_NOTRANSPOSE) { 646 v += nqpt; 647 } else { 648 u += nqpt; 649 } 650 } 651 if (emode & CEED_EVAL_GRAD) { 652 CeedInt P = basis->P1d, Q = basis->Q1d; 653 // In CEED_NOTRANSPOSE mode: 654 // u is (P^dim x nc), column-major layout (nc = ncomp) 655 // v is (Q^dim x nc x dim), column-major layout (nc = ncomp) 656 // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 657 if (tmode == CEED_TRANSPOSE) { 658 P = basis->Q1d, Q = basis->P1d; 659 } 660 CeedScalar tmp[2][ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 661 CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 662 ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)); 663 for (CeedInt p = 0; p < dim; p++) { 664 CeedInt pre = ncomp*CeedPowInt(P, dim-1), post = 1; 665 for (CeedInt d=0; d<dim; d++) { 666 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 667 (p==d)?basis->grad1d:basis->interp1d, 668 tmode, add&&(d==dim-1), 669 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 670 CeedChk(ierr); 671 pre /= P; 672 post *= Q; 673 } 674 if (tmode == CEED_NOTRANSPOSE) { 675 v += nqpt; 676 } else { 677 u += nqpt; 678 } 679 } 680 } 681 if (emode & CEED_EVAL_WEIGHT) { 682 if (tmode == CEED_TRANSPOSE) 683 return CeedError(basis->ceed, 1, 684 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 685 CeedInt Q = basis->Q1d; 686 for (CeedInt d=0; d<dim; d++) { 687 CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 688 for (CeedInt i=0; i<pre; i++) { 689 for (CeedInt j=0; j<Q; j++) { 690 for (CeedInt k=0; k<post; k++) { 691 v[(i*Q + j)*post + k] = basis->qweight1d[j] 692 * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 693 } 694 } 695 } 696 } 697 } 698 if (U) { 699 ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr); 700 } 701 ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr); 702 return 0; 703 } 704 705 static int CeedBasisDestroy_Magma(CeedBasis basis) { 706 return 0; 707 } 708 709 static int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, 710 CeedInt Q1d, const CeedScalar *interp1d, 711 const CeedScalar *grad1d, 712 const CeedScalar *qref1d, 713 const CeedScalar *qweight1d, 714 CeedBasis basis) { 715 basis->Apply = CeedBasisApply_Magma; 716 basis->Destroy = CeedBasisDestroy_Magma; 717 return 0; 718 } 719 720 static int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, 721 CeedInt ndof, CeedInt nqpts, 722 const CeedScalar *interp, 723 const CeedScalar *grad, 724 const CeedScalar *qref, 725 const CeedScalar *qweight, 726 CeedBasis basis) { 727 return CeedError(basis->ceed, 1, "Backend does not implement non-tensor bases"); 728 } 729 730 static int CeedQFunctionApply_Magma(CeedQFunction qf, CeedInt Q, 731 CeedVector *U, CeedVector *V) { 732 int ierr; 733 CeedQFunction_Ref *impl; 734 ierr = CeedQFunctionGetData(qf, (void *)&impl); CeedChk(ierr); 735 736 void *ctx; 737 ierr = CeedQFunctionGetContext(qf, &ctx); CeedChk(ierr); 738 739 int (*f)() = NULL; 740 ierr = CeedQFunctionGetUserFunction(qf, (int (* *)())&f); CeedChk(ierr); 741 742 CeedInt nIn, nOut; 743 ierr = CeedQFunctionGetNumArgs(qf, &nIn, &nOut); CeedChk(ierr); 744 745 for (int i = 0; i<nIn; i++) { 746 if (U[i]) { 747 ierr = CeedVectorGetArrayRead(U[i], CEED_MEM_HOST, &impl->inputs[i]); 748 CeedChk(ierr); 749 } 750 } 751 for (int i = 0; i<nOut; i++) { 752 if (U[i]) { 753 ierr = CeedVectorGetArray(V[i], CEED_MEM_HOST, &impl->outputs[i]); 754 CeedChk(ierr); 755 } 756 } 757 758 ierr = f(ctx, Q, impl->inputs, impl->outputs); CeedChk(ierr); 759 760 for (int i = 0; i<nIn; i++) { 761 if (U[i]) { 762 ierr = CeedVectorRestoreArrayRead(U[i], &impl->inputs[i]); CeedChk(ierr); 763 } 764 } 765 for (int i = 0; i<nOut; i++) { 766 if (U[i]) { 767 ierr = CeedVectorRestoreArray(V[i], &impl->outputs[i]); CeedChk(ierr); 768 } 769 } 770 return 0; 771 } 772 773 static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 774 int ierr; 775 CeedQFunction_Magma *impl; 776 ierr = CeedQFunctionGetData(qf, (void *)&impl); CeedChk(ierr); 777 778 ierr = CeedFree(&impl->inputs); CeedChk(ierr); 779 ierr = CeedFree(&impl->outputs); CeedChk(ierr); 780 ierr = CeedFree(&impl); CeedChk(ierr); 781 782 return 0; 783 } 784 785 static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 786 int ierr; 787 Ceed ceed; 788 ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChk(ierr); 789 790 CeedQFunction_Magma *impl; 791 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 792 ierr = CeedCalloc(16, &impl->inputs); CeedChk(ierr); 793 ierr = CeedCalloc(16, &impl->outputs); CeedChk(ierr); 794 ierr = CeedQFunctionSetData(qf, (void *)&impl); CeedChk(ierr); 795 796 qf->Apply = CeedQFunctionApply_Magma; 797 qf->Destroy = CeedQFunctionDestroy_Magma; 798 return 0; 799 } 800 801 static int CeedOperatorDestroy_Magma(CeedOperator op) { 802 int ierr; 803 CeedOperator_Magma *impl; 804 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 805 806 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 807 if (impl->Evecs[i]) { 808 ierr = CeedVectorDestroy(&impl->Evecs[i]); CeedChk(ierr); 809 } 810 } 811 ierr = CeedFree(&impl->Evecs); CeedChk(ierr); 812 ierr = CeedFree(&impl->Edata); CeedChk(ierr); 813 814 for (CeedInt i=0; i<impl->numein; i++) { 815 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 816 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 817 } 818 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 819 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 820 821 for (CeedInt i=0; i<impl->numeout; i++) { 822 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 823 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 824 } 825 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 826 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 827 828 829 ierr = CeedFree(&impl); CeedChk(ierr); 830 return 0; 831 } 832 833 834 /* 835 Setup infields or outfields 836 */ 837 static int CeedOperatorSetupFields_Magma(CeedQFunction qf, CeedOperator op, 838 bool inOrOut, 839 CeedVector *fullevecs, CeedVector *evecs, 840 CeedVector *qvecs, CeedInt starte, 841 CeedInt numfields, CeedInt Q) { 842 CeedInt dim = 1, ierr, ncomp; 843 Ceed ceed; 844 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 845 CeedQFunction_Magma *qf_data; 846 ierr = CeedQFunctionGetData(qf, (void *)&qf_data); CeedChk(ierr); 847 CeedBasis basis; 848 CeedElemRestriction Erestrict; 849 CeedOperatorField *opfields; 850 CeedQFunctionField *qffields; 851 if (inOrOut) { 852 ierr = CeedOperatorGetFields(op, NULL, &opfields); 853 CeedChk(ierr); 854 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 855 CeedChk(ierr); 856 } else { 857 ierr = CeedOperatorGetFields(op, &opfields, NULL); 858 CeedChk(ierr); 859 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 860 CeedChk(ierr); 861 } 862 863 // Loop over fields 864 for (CeedInt i=0; i<numfields; i++) { 865 CeedEvalMode emode; 866 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 867 if (emode != CEED_EVAL_WEIGHT) { 868 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &Erestrict); 869 CeedChk(ierr); 870 ierr = CeedElemRestrictionCreateVector(Erestrict, NULL, &fullevecs[i+starte]); 871 CeedChk(ierr); 872 } else { 873 } 874 switch(emode) { 875 case CEED_EVAL_NONE: 876 ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 877 CeedChk(ierr); 878 ierr = CeedVectorCreate(ceed, Q*ncomp, &evecs[i]); CeedChk(ierr); 879 ierr = CeedVectorCreate(ceed, Q*ncomp, &qvecs[i]); CeedChk(ierr); 880 break; // No action 881 case CEED_EVAL_INTERP: 882 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 883 ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 884 CeedChk(ierr); 885 ierr = CeedVectorCreate(ceed, Q*ncomp, &evecs[i]); CeedChk(ierr); 886 ierr = CeedVectorCreate(ceed, Q*ncomp, &qvecs[i]); CeedChk(ierr); 887 break; 888 case CEED_EVAL_GRAD: 889 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 890 ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 891 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 892 ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 893 CeedChk(ierr); 894 ierr = CeedVectorCreate(ceed, Q*ncomp, &evecs[i]); CeedChk(ierr); 895 ierr = CeedVectorCreate(ceed, Q*ncomp*dim, &qvecs[i]); CeedChk(ierr); 896 break; 897 case CEED_EVAL_WEIGHT: // Only on input fields 898 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 899 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 900 ierr = CeedVectorCreate(ceed, Q, &qvecs[i]); CeedChk(ierr); 901 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 902 NULL, qvecs[i]); CeedChk(ierr); 903 assert(starte==0); 904 break; 905 case CEED_EVAL_DIV: break; // Not implemented 906 case CEED_EVAL_CURL: break; // Not implemented 907 } 908 } 909 return 0; 910 } 911 912 /* 913 CeedOperator needs to connect all the named fields (be they active or passive) 914 to the named inputs and outputs of its CeedQFunction. 915 */ 916 static int CeedOperatorSetup_Magma(CeedOperator op) { 917 int ierr; 918 bool setupdone; 919 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 920 if (setupdone) return 0; 921 Ceed ceed; 922 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 923 CeedOperator_Magma *data; 924 ierr = CeedOperatorGetData(op, (void *)&data); CeedChk(ierr); 925 CeedQFunction qf; 926 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 927 CeedInt Q, numinputfields, numoutputfields; 928 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 929 ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 930 CeedOperatorField *opinputfields, *opoutputfields; 931 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 932 CeedChk(ierr); 933 CeedQFunctionField *qfinputfields, *qfoutputfields; 934 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 935 CeedChk(ierr); 936 937 data->numein = numinputfields; 938 data->numeout = numoutputfields; 939 940 // Allocate 941 const CeedInt numIO = numinputfields + numoutputfields; 942 943 ierr = CeedCalloc(numinputfields + numoutputfields, &data->Evecs); 944 CeedChk(ierr); 945 ierr = CeedCalloc(numinputfields + numoutputfields, &data->Edata); 946 CeedChk(ierr); 947 948 ierr = CeedCalloc(16, &data->evecsin); CeedChk(ierr); 949 ierr = CeedCalloc(16, &data->evecsout); CeedChk(ierr); 950 ierr = CeedCalloc(16, &data->qvecsin); CeedChk(ierr); 951 ierr = CeedCalloc(16, &data->qvecsout); CeedChk(ierr); 952 953 // Set up infield and outfield pointer arrays 954 // Infields 955 ierr = CeedOperatorSetupFields_Magma(qf, op, 0, data->Evecs, 956 data->evecsin, data->qvecsin, 0, 957 numinputfields, Q); 958 CeedChk(ierr); 959 // Outfields 960 ierr = CeedOperatorSetupFields_Magma(qf, op, 1, data->Evecs, 961 data->evecsout, data->qvecsout, 962 numinputfields, numoutputfields, Q); 963 CeedChk(ierr); 964 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 965 return 0; 966 } 967 968 static int CeedOperatorApply_Magma(CeedOperator op, CeedVector invec, 969 CeedVector outvec, CeedRequest *request) { 970 int ierr; 971 Ceed ceed; 972 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 973 CeedOperator_Magma *data; 974 ierr = CeedOperatorGetData(op, (void *)&data); CeedChk(ierr); 975 //CeedVector *E = data->Evecs, *D = data->D, outvec; 976 CeedInt Q, elemsize, numelements, numinputfields, numoutputfields, ncomp; 977 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 978 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 979 CeedQFunction qf; 980 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 981 CeedTransposeMode lmode; 982 CeedOperatorField *opinputfields, *opoutputfields; 983 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 984 CeedChk(ierr); 985 CeedQFunctionField *qfinputfields, *qfoutputfields; 986 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 987 CeedChk(ierr); 988 CeedEvalMode emode; 989 CeedVector vec; 990 CeedBasis basis; 991 CeedElemRestriction Erestrict; 992 993 ierr = CeedOperatorSetup_Magma(op); CeedChk(ierr); 994 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 995 CeedChk(ierr); 996 997 // Input Evecs and Restriction 998 for (CeedInt i=0; i<numinputfields; i++) { 999 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 1000 CeedChk(ierr); 1001 if (emode & CEED_EVAL_WEIGHT) { 1002 } else { // Restriction 1003 // Get input vector 1004 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 1005 if (vec == CEED_VECTOR_ACTIVE) 1006 vec = invec; 1007 // Restrict 1008 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 1009 CeedChk(ierr); 1010 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); CeedChk(ierr); 1011 ierr = CeedElemRestrictionApply(Erestrict, CEED_NOTRANSPOSE, 1012 lmode, vec, data->Evecs[i], 1013 request); CeedChk(ierr); 1014 // Get evec 1015 ierr = CeedVectorGetArrayRead(data->Evecs[i], CEED_MEM_HOST, 1016 (const CeedScalar **) &data->Edata[i]); 1017 CeedChk(ierr); 1018 } 1019 } 1020 1021 // Output Evecs 1022 for (CeedInt i=0; i<numoutputfields; i++) { 1023 ierr = CeedVectorGetArray(data->Evecs[i+data->numein], CEED_MEM_HOST, 1024 &data->Edata[i + numinputfields]); CeedChk(ierr); 1025 } 1026 1027 // Loop through elements 1028 for (CeedInt e=0; e<numelements; e++) { 1029 // Input basis apply if needed 1030 for (CeedInt i=0; i<numinputfields; i++) { 1031 // Get elemsize, emode, ncomp 1032 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 1033 CeedChk(ierr); 1034 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 1035 CeedChk(ierr); 1036 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 1037 CeedChk(ierr); 1038 ierr = CeedQFunctionFieldGetNumComponents(qfinputfields[i], &ncomp); 1039 CeedChk(ierr); 1040 // Basis action 1041 switch(emode) { 1042 case CEED_EVAL_NONE: 1043 ierr = CeedVectorSetArray(data->qvecsin[i], CEED_MEM_HOST, 1044 CEED_USE_POINTER, 1045 &data->Edata[i][e*Q*ncomp]); CeedChk(ierr); 1046 break; 1047 case CEED_EVAL_INTERP: 1048 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 1049 ierr = CeedVectorSetArray(data->evecsin[i], CEED_MEM_HOST, 1050 CEED_USE_POINTER, 1051 &data->Edata[i][e*elemsize*ncomp]); 1052 CeedChk(ierr); 1053 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, 1054 CEED_EVAL_INTERP, data->evecsin[i], 1055 data->qvecsin[i]); CeedChk(ierr); 1056 break; 1057 case CEED_EVAL_GRAD: 1058 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 1059 ierr = CeedVectorSetArray(data->evecsin[i], CEED_MEM_HOST, 1060 CEED_USE_POINTER, 1061 &data->Edata[i][e*elemsize*ncomp]); 1062 CeedChk(ierr); 1063 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, 1064 CEED_EVAL_GRAD, data->evecsin[i], 1065 data->qvecsin[i]); CeedChk(ierr); 1066 break; 1067 case CEED_EVAL_WEIGHT: 1068 break; // No action 1069 case CEED_EVAL_DIV: 1070 break; // Not implemented 1071 case CEED_EVAL_CURL: 1072 break; // Not implemented 1073 } 1074 } 1075 // Output pointers 1076 for (CeedInt i=0; i<numoutputfields; i++) { 1077 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 1078 CeedChk(ierr); 1079 if (emode == CEED_EVAL_NONE) { 1080 ierr = CeedQFunctionFieldGetNumComponents(qfoutputfields[i], &ncomp); 1081 CeedChk(ierr); 1082 ierr = CeedVectorSetArray(data->qvecsout[i], CEED_MEM_HOST, 1083 CEED_USE_POINTER, 1084 &data->Edata[i + numinputfields][e*Q*ncomp]); 1085 CeedChk(ierr); 1086 } 1087 if (emode == CEED_EVAL_INTERP) { 1088 } 1089 if (emode == CEED_EVAL_GRAD) { 1090 } 1091 if (emode == CEED_EVAL_WEIGHT) { 1092 } 1093 } 1094 1095 // Q function 1096 ierr = CeedQFunctionApply(qf, Q, data->qvecsin, data->qvecsout); CeedChk(ierr); 1097 1098 // Output basis apply if needed 1099 for (CeedInt i=0; i<numoutputfields; i++) { 1100 // Get elemsize, emode, ncomp 1101 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 1102 CeedChk(ierr); 1103 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 1104 CeedChk(ierr); 1105 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 1106 CeedChk(ierr); 1107 ierr = CeedQFunctionFieldGetNumComponents(qfoutputfields[i], &ncomp); 1108 CeedChk(ierr); 1109 // Basis action 1110 switch(emode) { 1111 case CEED_EVAL_NONE: 1112 break; // No action 1113 case CEED_EVAL_INTERP: 1114 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 1115 CeedChk(ierr); 1116 ierr = CeedVectorSetArray(data->evecsout[i], CEED_MEM_HOST, 1117 CEED_USE_POINTER, 1118 &data->Edata[i + numinputfields][e*elemsize*ncomp]); 1119 ierr = CeedBasisApply(basis, 1, CEED_TRANSPOSE, 1120 CEED_EVAL_INTERP, data->qvecsout[i], 1121 data->evecsout[i]); CeedChk(ierr); 1122 break; 1123 case CEED_EVAL_GRAD: 1124 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 1125 CeedChk(ierr); 1126 ierr = CeedVectorSetArray(data->evecsout[i], CEED_MEM_HOST, 1127 CEED_USE_POINTER, 1128 &data->Edata[i + numinputfields][e*elemsize*ncomp]); 1129 ierr = CeedBasisApply(basis, 1, CEED_TRANSPOSE, 1130 CEED_EVAL_GRAD, data->qvecsout[i], 1131 data->evecsout[i]); CeedChk(ierr); 1132 break; 1133 case CEED_EVAL_WEIGHT: break; // Should not occur 1134 case CEED_EVAL_DIV: break; // Not implemented 1135 case CEED_EVAL_CURL: break; // Not implemented 1136 } 1137 } 1138 } // numelements 1139 1140 // Zero lvecs 1141 for (CeedInt i=0; i<numoutputfields; i++) { 1142 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 1143 if (vec == CEED_VECTOR_ACTIVE) 1144 vec = outvec; 1145 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 1146 } 1147 1148 // Output restriction 1149 for (CeedInt i=0; i<numoutputfields; i++) { 1150 // Restore evec 1151 ierr = CeedVectorRestoreArray(data->Evecs[i+data->numein], 1152 &data->Edata[i + numinputfields]); 1153 CeedChk(ierr); 1154 // Get output vector 1155 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 1156 // Active 1157 if (vec == CEED_VECTOR_ACTIVE) 1158 vec = outvec; 1159 // Restrict 1160 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 1161 CeedChk(ierr); 1162 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr); 1163 ierr = CeedElemRestrictionApply(Erestrict, CEED_TRANSPOSE, 1164 lmode, data->Evecs[i+data->numein], vec, 1165 request); CeedChk(ierr); 1166 } 1167 1168 // Restore input arrays 1169 for (CeedInt i=0; i<numinputfields; i++) { 1170 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 1171 CeedChk(ierr); 1172 if (emode & CEED_EVAL_WEIGHT) { 1173 } else { 1174 // Restriction 1175 ierr = CeedVectorRestoreArrayRead(data->Evecs[i], 1176 (const CeedScalar **) &data->Edata[i]); 1177 CeedChk(ierr); 1178 } 1179 } 1180 return 0; 1181 } 1182 1183 static int CeedOperatorCreate_Magma(CeedOperator op) { 1184 int ierr; 1185 CeedOperator_Magma *impl; 1186 Ceed ceed; 1187 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 1188 1189 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 1190 ierr = CeedOperatorSetData(op, (void *)&impl); 1191 1192 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 1193 CeedOperatorApply_Magma); CeedChk(ierr); 1194 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 1195 CeedOperatorDestroy_Magma); CeedChk(ierr); 1196 return 0; 1197 } 1198 1199 int CeedCompositeOperatorCreate_Magma(CeedOperator op) { 1200 int ierr; 1201 Ceed ceed; 1202 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 1203 return CeedError(ceed, 1, "Backend does not support composite operators"); 1204 } 1205 1206 // ***************************************************************************** 1207 // * INIT 1208 // ***************************************************************************** 1209 static int CeedInit_Magma(const char *resource, Ceed ceed) { 1210 int ierr; 1211 if (strcmp(resource, "/gpu/magma")) 1212 return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 1213 1214 ierr = magma_init(); 1215 if (ierr) return CeedError(ceed, 1, "error in magma_init(): %d\n", ierr); 1216 //magma_print_environment(); 1217 1218 ceed->VecCreate = CeedVectorCreate_Magma; 1219 ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 1220 ceed->BasisCreateH1 = CeedBasisCreateH1_Magma; 1221 ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 1222 ceed->ElemRestrictionCreateBlocked = CeedElemRestrictionCreateBlocked_Magma; 1223 ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 1224 ceed->OperatorCreate = CeedOperatorCreate_Magma; 1225 ceed->CompositeOperatorCreate = CeedCompositeOperatorCreate_Magma; 1226 return 0; 1227 } 1228 1229 // ***************************************************************************** 1230 // * REGISTER 1231 // ***************************************************************************** 1232 __attribute__((constructor)) 1233 static void Register(void) { 1234 CeedRegister("/gpu/magma", CeedInit_Magma, 20); 1235 } 1236