1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-magma.h" 18 #include <string.h> 19 20 typedef struct { 21 CeedScalar *array; 22 CeedScalar *darray; 23 int own_; 24 int down_; 25 } CeedVector_Magma; 26 27 typedef struct { 28 CeedInt *indices; 29 CeedInt *dindices; 30 int own_; 31 int down_; // cover a case where we own Device memory 32 } CeedElemRestriction_Magma; 33 34 typedef struct { 35 const CeedScalar **inputs; 36 CeedScalar **outputs; 37 } CeedQFunction_Magma; 38 39 typedef struct { 40 CeedVector *Evecs; /// E-vectors needed to apply operator (in followed by out) 41 CeedScalar **Edata; 42 CeedVector *evecsin; /// Input E-vectors needed to apply operator 43 CeedVector *evecsout; /// Output E-vectors needed to apply operator 44 CeedVector *qvecsin; /// Input Q-vectors needed to apply operator 45 CeedVector *qvecsout; /// Output Q-vectors needed to apply operator 46 CeedInt numein; 47 CeedInt numeout; 48 } CeedOperator_Magma; 49 50 // ***************************************************************************** 51 // * Initialize vector vec (after free mem) with values from array based on cmode 52 // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 53 // * to array, and data is copied (not store passed pointer) 54 // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 55 // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 56 // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 57 // ***************************************************************************** 58 static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 59 CeedCopyMode cmode, CeedScalar *array) { 60 CeedVector_Magma *impl = vec->data; 61 int ierr; 62 63 // If own data, free the "old" data, e.g., as it may be of different size 64 if (impl->own_) { 65 magma_free( impl->darray ); 66 magma_free_pinned( impl->array ); 67 impl->darray = NULL; 68 impl->array = NULL; 69 impl->own_ = 0; 70 impl->down_= 0; 71 } 72 73 if (mtype == CEED_MEM_HOST) { 74 // memory is on the host; own_ = 0 75 switch (cmode) { 76 case CEED_COPY_VALUES: 77 ierr = magma_malloc( (void **)&impl->darray, 78 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 79 ierr = magma_malloc_pinned( (void **)&impl->array, 80 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 81 impl->own_ = 1; 82 83 if (array != NULL) 84 magma_setvector(vec->length, sizeof(array[0]), 85 array, 1, impl->darray, 1); 86 break; 87 case CEED_OWN_POINTER: 88 ierr = magma_malloc( (void **)&impl->darray, 89 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 90 // TODO: possible problem here is if we are passed non-pinned memory; 91 // (as we own it, lter in destroy, we use free for pinned memory). 92 impl->array = array; 93 impl->own_ = 1; 94 95 if (array != NULL) 96 magma_setvector(vec->length, sizeof(array[0]), 97 array, 1, impl->darray, 1); 98 break; 99 case CEED_USE_POINTER: 100 ierr = magma_malloc( (void **)&impl->darray, 101 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 102 magma_setvector(vec->length, sizeof(array[0]), 103 array, 1, impl->darray, 1); 104 105 impl->down_ = 1; 106 impl->array = array; 107 } 108 } else if (mtype == CEED_MEM_DEVICE) { 109 // memory is on the device; own = 0 110 switch (cmode) { 111 case CEED_COPY_VALUES: 112 ierr = magma_malloc( (void **)&impl->darray, 113 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 114 ierr = magma_malloc_pinned( (void **)&impl->array, 115 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 116 impl->own_ = 1; 117 118 if (array) 119 magma_copyvector(vec->length, sizeof(array[0]), 120 array, 1, impl->darray, 1); 121 else 122 // t30 assumes allocation initializes with 0s 123 magma_setvector(vec->length, sizeof(array[0]), 124 impl->array, 1, impl->darray, 1); 125 break; 126 case CEED_OWN_POINTER: 127 impl->darray = array; 128 ierr = magma_malloc_pinned( (void **)&impl->array, 129 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 130 impl->own_ = 1; 131 132 break; 133 case CEED_USE_POINTER: 134 impl->darray = array; 135 impl->array = NULL; 136 } 137 138 } else 139 return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 140 141 return 0; 142 } 143 144 // ***************************************************************************** 145 // * Give data pointer from vector vec to array (on HOST or DEVICE) 146 // ***************************************************************************** 147 static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 148 CeedScalar **array) { 149 CeedVector_Magma *impl = vec->data; 150 int ierr; 151 152 if (mtype == CEED_MEM_HOST) { 153 if (impl->own_) { 154 // data is owned so GPU had the most up-to-date version; copy it 155 // TTT - apparantly it doesn't have most up to date data 156 magma_getvector(vec->length, sizeof(*array[0]), 157 impl->darray, 1, impl->array, 1); 158 CeedDebug("\033[31m[CeedVectorGetArray_Magma]"); 159 //fprintf(stderr,"rrrrrrrrrrrrrrr\n"); 160 } else if (impl->array == NULL) { 161 // Vector doesn't own the data and was set on GPU 162 if (impl->darray == NULL) { 163 // call was made just to allocate memory 164 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 165 CeedChk(ierr); 166 } else 167 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 168 } 169 *array = impl->array; 170 } else if (mtype == CEED_MEM_DEVICE) { 171 if (impl->darray == NULL) { 172 // Vector doesn't own the data and was set on the CPU 173 if (impl->array == NULL) { 174 // call was made just to allocate memory 175 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 176 CeedChk(ierr); 177 } else 178 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 179 } 180 *array = impl->darray; 181 } else 182 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 183 184 return 0; 185 } 186 187 // ***************************************************************************** 188 // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 189 // ***************************************************************************** 190 static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 191 const CeedScalar **array) { 192 CeedVector_Magma *impl = vec->data; 193 int ierr; 194 195 if (mtype == CEED_MEM_HOST) { 196 if (impl->own_) { 197 // data is owned so GPU had the most up-to-date version; copy it 198 magma_getvector(vec->length, sizeof(*array[0]), 199 impl->darray, 1, impl->array, 1); 200 } else if (impl->array == NULL) { 201 // Vector doesn't own the data and was set on GPU 202 if (impl->darray == NULL) { 203 // call was made just to allocate memory 204 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 205 CeedChk(ierr); 206 } else 207 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 208 } 209 *array = impl->array; 210 } else if (mtype == CEED_MEM_DEVICE) { 211 if (impl->darray == NULL) { 212 // Vector doesn't own the data and was set on the CPU 213 if (impl->array == NULL) { 214 // call was made just to allocate memory 215 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 216 CeedChk(ierr); 217 } else 218 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 219 } 220 *array = impl->darray; 221 } else 222 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 223 224 return 0; 225 } 226 227 // ***************************************************************************** 228 // * There is no mtype here for array so it is not clear if we restore from HOST 229 // * memory or from DEVICE memory. We assume that it is CPU memory because if 230 // * it was GPU memory we would not call this routine at all. 231 // * Restore vector vec with values from array, where array received its values 232 // * from vec and possibly modified them. 233 // ***************************************************************************** 234 static int CeedVectorRestoreArray_Magma(CeedVector vec) { 235 CeedVector_Magma *impl = vec->data; 236 237 if (impl->down_) { 238 // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 239 magma_getvector(vec->length, sizeof(*array[0]), 240 impl->darray, 1, impl->array, 1); 241 } 242 return 0; 243 } 244 245 // ***************************************************************************** 246 // * There is no mtype here for array so it is not clear if we restore from HOST 247 // * memory or from DEVICE memory. We assume that it is CPU memory because if 248 // * it was GPU memory we would not call this routine at all. 249 // * Restore vector vec with values from array, where array received its values 250 // * from vec to only read them; in this case vec may have been modified meanwhile 251 // * and needs to be restored here. 252 // ***************************************************************************** 253 static int CeedVectorRestoreArrayRead_Magma(CeedVector vec) { 254 CeedVector_Magma *impl = vec->data; 255 256 if (impl->down_) { 257 // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 258 magma_getvector(vec->length, sizeof(*array[0]), 259 impl->darray, 1, impl->array, 1); 260 } 261 return 0; 262 } 263 264 static int CeedVectorDestroy_Magma(CeedVector vec) { 265 CeedVector_Magma *impl = vec->data; 266 int ierr; 267 268 // Free if we own the data 269 if (impl->own_) { 270 ierr = magma_free_pinned(impl->array); CeedChk(ierr); 271 ierr = magma_free(impl->darray); CeedChk(ierr); 272 } else if (impl->down_) { 273 ierr = magma_free(impl->darray); CeedChk(ierr); 274 } 275 ierr = CeedFree(&vec->data); CeedChk(ierr); 276 return 0; 277 } 278 279 // ***************************************************************************** 280 // * Create vector vec of size n 281 // ***************************************************************************** 282 static int CeedVectorCreate_Magma(CeedInt n, CeedVector vec) { 283 CeedVector_Magma *impl; 284 int ierr; 285 286 vec->SetArray = CeedVectorSetArray_Magma; 287 vec->GetArray = CeedVectorGetArray_Magma; 288 vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 289 vec->RestoreArray = CeedVectorRestoreArray_Magma; 290 vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 291 vec->Destroy = CeedVectorDestroy_Magma; 292 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 293 impl->darray = NULL; 294 impl->array = NULL; 295 impl->own_ = 0; 296 impl->down_= 0; 297 vec->data = impl; 298 return 0; 299 } 300 301 302 // ***************************************************************************** 303 // * Apply restriction operator r to u: v = r(rmode) u 304 // ***************************************************************************** 305 static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 306 CeedTransposeMode tmode, 307 CeedTransposeMode lmode, CeedVector u, 308 CeedVector v, CeedRequest *request) { 309 CeedElemRestriction_Magma *impl = r->data; 310 int ierr; 311 const CeedScalar *uu; 312 CeedScalar *vv; 313 CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof, 314 ncomp=r->ncomp; 315 CeedInt esize = nelem * elemsize; 316 317 #ifdef USE_MAGMA_BATCH2 318 CeedInt *dindices = impl->dindices; 319 // Get pointers on the device 320 ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr); 321 ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr); 322 #else 323 CeedInt *indices = impl->indices; 324 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 325 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 326 #endif 327 328 if (tmode == CEED_NOTRANSPOSE) { 329 // Perform: v = r * u 330 if (!impl->indices) { 331 for (CeedInt i=0; i<esize*ncomp; i++) vv[i] = uu[i]; 332 } else if (ncomp == 1) { 333 #ifdef USE_MAGMA_BATCH2 334 magma_template<<i=0:esize>> 335 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 336 vv[i] = uu[dindices[i]]; 337 } 338 #else 339 for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]]; 340 #endif 341 } else { 342 // vv is (elemsize x ncomp x nelem), column-major 343 if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 344 #ifdef USE_MAGMA_BATCH2 345 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 346 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) { 347 vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d]; 348 } 349 #else 350 for (CeedInt e = 0; e < nelem; e++) 351 for (CeedInt d = 0; d < ncomp; d++) 352 for (CeedInt i=0; i < elemsize; i++) { 353 vv[i + elemsize*(d+ncomp*e)] = 354 uu[indices[i+elemsize*e]+ndof*d]; 355 } 356 #endif 357 } else { // u is (ncomp x ndof), column-major 358 #ifdef USE_MAGMA_BATCH2 359 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 360 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 361 vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]]; 362 } 363 #else 364 for (CeedInt e = 0; e < nelem; e++) 365 for (CeedInt d = 0; d < ncomp; d++) 366 for (CeedInt i=0; i< elemsize; i++) { 367 vv[i + elemsize*(d+ncomp*e)] = 368 uu[d+ncomp*indices[i+elemsize*e]]; 369 } 370 #endif 371 } 372 } 373 } else { 374 // Note: in transpose mode, we perform: v += r^t * u 375 if (!impl->indices) { 376 for (CeedInt i=0; i<esize; i++) vv[i] += uu[i]; 377 } else if (ncomp == 1) { 378 // fprintf(stderr,"3 ---------\n"); 379 #ifdef USE_MAGMA_BATCH2 380 magma_template<<i=0:esize>> 381 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 382 magmablas_datomic_add( &vv[dindices[i]], uu[i]); 383 } 384 #else 385 for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i]; 386 #endif 387 } else { // u is (elemsize x ncomp x nelem) 388 fprintf(stderr,"2 ---------\n"); 389 390 if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 391 #ifdef USE_MAGMA_BATCH2 392 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 393 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) { 394 magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d], 395 uu[i+iend*(d+e*dend)]); 396 } 397 #else 398 for (CeedInt e = 0; e < nelem; e++) 399 for (CeedInt d = 0; d < ncomp; d++) 400 for (CeedInt i=0; i < elemsize; i++) { 401 vv[indices[i + elemsize*e]+ndof*d] += 402 uu[i + elemsize*(d+e*ncomp)]; 403 } 404 #endif 405 } else { // vv is (ncomp x ndof), column-major 406 #ifdef USE_MAGMA_BATCH2 407 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 408 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 409 magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]], 410 uu[i+iend*(d+e*dend)]); 411 } 412 #else 413 for (CeedInt e = 0; e < nelem; e++) 414 for (CeedInt d = 0; d < ncomp; d++) 415 for (CeedInt i=0; i < elemsize; i++) { 416 vv[d+ncomp*indices[i + elemsize*e]] += 417 uu[i + elemsize*(d+e*ncomp)]; 418 } 419 #endif 420 } 421 } 422 } 423 424 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 425 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 426 427 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 428 *request = NULL; 429 return 0; 430 } 431 432 int CeedElemRestrictionApplyBlock_Magma(CeedElemRestriction r, 433 CeedInt block, CeedTransposeMode tmode, 434 CeedTransposeMode lmode, CeedVector u, 435 CeedVector v, CeedRequest *request) { 436 int ierr; 437 Ceed ceed; 438 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 439 return CeedError(ceed, 1, "Backend does not implement blocked restrictions"); 440 } 441 442 static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 443 CeedElemRestriction_Magma *impl = r->data; 444 int ierr; 445 446 // Free if we own the data 447 if (impl->own_) { 448 ierr = magma_free_pinned(impl->indices); CeedChk(ierr); 449 ierr = magma_free(impl->dindices); CeedChk(ierr); 450 } else if (impl->down_) { 451 ierr = magma_free(impl->dindices); CeedChk(ierr); 452 } 453 ierr = CeedFree(&r->data); CeedChk(ierr); 454 return 0; 455 } 456 457 static int CeedElemRestrictionCreate_Magma(CeedMemType mtype, 458 CeedCopyMode cmode, 459 const CeedInt *indices, CeedElemRestriction r) { 460 int ierr, size = r->nelem*r->elemsize; 461 CeedElemRestriction_Magma *impl; 462 463 // Allocate memory for the MAGMA Restricton and initializa pointers to NULL 464 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 465 impl->dindices = NULL; 466 impl->indices = NULL; 467 impl->own_ = 0; 468 impl->down_= 0; 469 470 if (mtype == CEED_MEM_HOST) { 471 // memory is on the host; own_ = 0 472 switch (cmode) { 473 case CEED_COPY_VALUES: 474 ierr = magma_malloc( (void **)&impl->dindices, 475 size * sizeof(CeedInt)); CeedChk(ierr); 476 ierr = magma_malloc_pinned( (void **)&impl->indices, 477 size * sizeof(CeedInt)); CeedChk(ierr); 478 impl->own_ = 1; 479 480 if (indices != NULL) { 481 memcpy(impl->indices, indices, size * sizeof(indices[0])); 482 magma_setvector(size, sizeof(CeedInt), 483 impl->indices, 1, impl->dindices, 1); 484 } 485 break; 486 case CEED_OWN_POINTER: 487 ierr = magma_malloc( (void **)&impl->dindices, 488 size * sizeof(CeedInt)); CeedChk(ierr); 489 // TODO: possible problem here is if we are passed non-pinned memory; 490 // (as we own it, lter in destroy, we use free for pinned memory). 491 impl->indices = (CeedInt *)indices; 492 impl->own_ = 1; 493 494 if (indices != NULL) 495 magma_setvector(size, sizeof(CeedInt), 496 indices, 1, impl->dindices, 1); 497 break; 498 case CEED_USE_POINTER: 499 ierr = magma_malloc( (void **)&impl->dindices, 500 size * sizeof(CeedInt)); CeedChk(ierr); 501 magma_setvector(size, sizeof(CeedInt), 502 indices, 1, impl->dindices, 1); 503 impl->down_ = 1; 504 impl->indices = (CeedInt *)indices; 505 } 506 } else if (mtype == CEED_MEM_DEVICE) { 507 // memory is on the device; own = 0 508 switch (cmode) { 509 case CEED_COPY_VALUES: 510 ierr = magma_malloc( (void **)&impl->dindices, 511 size * sizeof(CeedInt)); CeedChk(ierr); 512 ierr = magma_malloc_pinned( (void **)&impl->indices, 513 size * sizeof(CeedInt)); CeedChk(ierr); 514 impl->own_ = 1; 515 516 if (indices) 517 magma_copyvector(size, sizeof(CeedInt), 518 indices, 1, impl->dindices, 1); 519 break; 520 case CEED_OWN_POINTER: 521 impl->dindices = (CeedInt *)indices; 522 ierr = magma_malloc_pinned( (void **)&impl->indices, 523 size * sizeof(CeedInt)); CeedChk(ierr); 524 impl->own_ = 1; 525 526 break; 527 case CEED_USE_POINTER: 528 impl->dindices = (CeedInt *)indices; 529 impl->indices = NULL; 530 } 531 532 } else 533 return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported"); 534 535 r->data = impl; 536 r->Apply = CeedElemRestrictionApply_Magma; 537 r->ApplyBlock = CeedElemRestrictionApplyBlock_Magma; 538 r->Destroy = CeedElemRestrictionDestroy_Magma; 539 540 return 0; 541 } 542 543 static int CeedElemRestrictionCreateBlocked_Magma(CeedMemType mtype, 544 CeedCopyMode cmode, 545 const CeedInt *indices, CeedElemRestriction r) { 546 return CeedError(r->ceed, 1, "Backend does not implement blocked restrictions"); 547 } 548 549 // Contracts on the middle index 550 // NOTRANSPOSE: V_ajc = T_jb U_abc 551 // TRANSPOSE: V_ajc = T_bj U_abc 552 // If Add != 0, "=" is replaced by "+=" 553 static int CeedTensorContract_Magma(Ceed ceed, 554 CeedInt A, CeedInt B, CeedInt C, CeedInt J, 555 const CeedScalar *t, CeedTransposeMode tmode, 556 const CeedInt Add, 557 const CeedScalar *u, CeedScalar *v) { 558 #ifdef USE_MAGMA_BATCH 559 magma_dtensor_contract(ceed, A, B, C, J, t, tmode, Add, u, v); 560 #else 561 CeedInt tstride0 = B, tstride1 = 1; 562 if (tmode == CEED_TRANSPOSE) { 563 tstride0 = 1; tstride1 = J; 564 } 565 CeedDebug("\033[31m[CeedTensorContract] A=%d, J=%d, C=%d, B=%d: %d %d %d", 566 A,J,C,B,A*J*B*C, C*J*A, C*B*A); 567 for (CeedInt a=0; a<A; a++) { 568 for (CeedInt j=0; j<J; j++) { 569 if (!Add) { 570 for (CeedInt c=0; c<C; c++) 571 v[(a*J+j)*C+c] = 0; 572 } 573 for (CeedInt b=0; b<B; b++) { 574 for (CeedInt c=0; c<C; c++) { 575 v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 576 } 577 } 578 } 579 } 580 #endif 581 return 0; 582 } 583 584 static int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, 585 CeedTransposeMode tmode, CeedEvalMode emode, 586 CeedVector U, CeedVector V) { 587 int ierr; 588 const CeedInt dim = basis->dim; 589 const CeedInt ncomp = basis->ncomp; 590 const CeedInt nqpt = ncomp*CeedPowInt(basis->Q1d, dim); 591 const CeedInt add = (tmode == CEED_TRANSPOSE); 592 const CeedScalar *u; 593 CeedScalar *v; 594 if (U) { 595 ierr = CeedVectorGetArrayRead(U, CEED_MEM_HOST, &u); CeedChk(ierr); 596 } else if (emode != CEED_EVAL_WEIGHT) { 597 return CeedError(ceed, 1, 598 "An input vector is required for this CeedEvalMode"); 599 } 600 ierr = CeedVectorGetArray(V, CEED_MEM_HOST, &v); CeedChk(ierr); 601 602 if (nelem != 1) 603 return CeedError(basis->ceed, 1, 604 "This backend does not support BasisApply for multiple elements"); 605 606 CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d", 607 ncomp*CeedPowInt(basis->P1d, dim)); 608 609 if (tmode == CEED_TRANSPOSE) { 610 const CeedInt vsize = ncomp*CeedPowInt(basis->P1d, dim); 611 for (CeedInt i = 0; i < vsize; i++) 612 v[i] = (CeedScalar) 0; 613 } 614 if (emode & CEED_EVAL_INTERP) { 615 CeedInt P = basis->P1d, Q = basis->Q1d; 616 if (tmode == CEED_TRANSPOSE) { 617 P = basis->Q1d; Q = basis->P1d; 618 } 619 CeedInt pre = ncomp*CeedPowInt(P, dim-1), post = 1; 620 CeedScalar tmp[2][ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 621 CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 622 ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)); 623 for (CeedInt d=0; d<dim; d++) { 624 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 625 tmode, add&&(d==dim-1), 626 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 627 CeedChk(ierr); 628 pre /= P; 629 post *= Q; 630 } 631 if (tmode == CEED_NOTRANSPOSE) { 632 v += nqpt; 633 } else { 634 u += nqpt; 635 } 636 } 637 if (emode & CEED_EVAL_GRAD) { 638 CeedInt P = basis->P1d, Q = basis->Q1d; 639 // In CEED_NOTRANSPOSE mode: 640 // u is (P^dim x nc), column-major layout (nc = ncomp) 641 // v is (Q^dim x nc x dim), column-major layout (nc = ncomp) 642 // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 643 if (tmode == CEED_TRANSPOSE) { 644 P = basis->Q1d, Q = basis->P1d; 645 } 646 CeedScalar tmp[2][ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 647 CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 648 ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)); 649 for (CeedInt p = 0; p < dim; p++) { 650 CeedInt pre = ncomp*CeedPowInt(P, dim-1), post = 1; 651 for (CeedInt d=0; d<dim; d++) { 652 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 653 (p==d)?basis->grad1d:basis->interp1d, 654 tmode, add&&(d==dim-1), 655 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 656 CeedChk(ierr); 657 pre /= P; 658 post *= Q; 659 } 660 if (tmode == CEED_NOTRANSPOSE) { 661 v += nqpt; 662 } else { 663 u += nqpt; 664 } 665 } 666 } 667 if (emode & CEED_EVAL_WEIGHT) { 668 if (tmode == CEED_TRANSPOSE) 669 return CeedError(basis->ceed, 1, 670 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 671 CeedInt Q = basis->Q1d; 672 for (CeedInt d=0; d<dim; d++) { 673 CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 674 for (CeedInt i=0; i<pre; i++) { 675 for (CeedInt j=0; j<Q; j++) { 676 for (CeedInt k=0; k<post; k++) { 677 v[(i*Q + j)*post + k] = basis->qweight1d[j] 678 * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 679 } 680 } 681 } 682 } 683 } 684 if (U) { 685 ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr); 686 } 687 ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr); 688 return 0; 689 } 690 691 static int CeedBasisDestroy_Magma(CeedBasis basis) { 692 return 0; 693 } 694 695 static int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, 696 CeedInt Q1d, const CeedScalar *interp1d, 697 const CeedScalar *grad1d, 698 const CeedScalar *qref1d, 699 const CeedScalar *qweight1d, 700 CeedBasis basis) { 701 basis->Apply = CeedBasisApply_Magma; 702 basis->Destroy = CeedBasisDestroy_Magma; 703 return 0; 704 } 705 706 static int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, 707 CeedInt ndof, CeedInt nqpts, 708 const CeedScalar *interp, 709 const CeedScalar *grad, 710 const CeedScalar *qref, 711 const CeedScalar *qweight, 712 CeedBasis basis) { 713 return CeedError(basis->ceed, 1, "Backend does not implement non-tensor bases"); 714 } 715 716 static int CeedQFunctionApply_Magma(CeedQFunction qf, CeedInt Q, 717 CeedVector *U, CeedVector *V) { 718 int ierr; 719 CeedQFunction_Ref *impl; 720 ierr = CeedQFunctionGetData(qf, (void *)&impl); CeedChk(ierr); 721 722 void *ctx; 723 ierr = CeedQFunctionGetContext(qf, &ctx); CeedChk(ierr); 724 725 int (*f)() = NULL; 726 ierr = CeedQFunctionGetUserFunction(qf, (int (* *)())&f); CeedChk(ierr); 727 728 CeedInt nIn, nOut; 729 ierr = CeedQFunctionGetNumArgs(qf, &nIn, &nOut); CeedChk(ierr); 730 731 for (int i = 0; i<nIn; i++) { 732 if (U[i]) { 733 ierr = CeedVectorGetArrayRead(U[i], CEED_MEM_HOST, &impl->inputs[i]); 734 CeedChk(ierr); 735 } 736 } 737 for (int i = 0; i<nOut; i++) { 738 if (U[i]) { 739 ierr = CeedVectorGetArray(V[i], CEED_MEM_HOST, &impl->outputs[i]); 740 CeedChk(ierr); 741 } 742 } 743 744 ierr = f(ctx, Q, impl->inputs, impl->outputs); CeedChk(ierr); 745 746 for (int i = 0; i<nIn; i++) { 747 if (U[i]) { 748 ierr = CeedVectorRestoreArrayRead(U[i], &impl->inputs[i]); CeedChk(ierr); 749 } 750 } 751 for (int i = 0; i<nOut; i++) { 752 if (U[i]) { 753 ierr = CeedVectorRestoreArray(V[i], &impl->outputs[i]); CeedChk(ierr); 754 } 755 } 756 return 0; 757 } 758 759 static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 760 int ierr; 761 CeedQFunction_Magma *impl; 762 ierr = CeedQFunctionGetData(qf, (void *)&impl); CeedChk(ierr); 763 764 ierr = CeedFree(&impl->inputs); CeedChk(ierr); 765 ierr = CeedFree(&impl->outputs); CeedChk(ierr); 766 ierr = CeedFree(&impl); CeedChk(ierr); 767 768 return 0; 769 } 770 771 static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 772 int ierr; 773 Ceed ceed; 774 ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChk(ierr); 775 776 CeedQFunction_Magma *impl; 777 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 778 ierr = CeedCalloc(16, &impl->inputs); CeedChk(ierr); 779 ierr = CeedCalloc(16, &impl->outputs); CeedChk(ierr); 780 ierr = CeedQFunctionSetData(qf, (void *)&impl); CeedChk(ierr); 781 782 qf->Apply = CeedQFunctionApply_Magma; 783 qf->Destroy = CeedQFunctionDestroy_Magma; 784 return 0; 785 } 786 787 static int CeedOperatorDestroy_Magma(CeedOperator op) { 788 int ierr; 789 CeedOperator_Magma *impl; 790 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 791 792 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 793 if (impl->Evecs[i]) { 794 ierr = CeedVectorDestroy(&impl->Evecs[i]); CeedChk(ierr); 795 } 796 } 797 ierr = CeedFree(&impl->Evecs); CeedChk(ierr); 798 ierr = CeedFree(&impl->Edata); CeedChk(ierr); 799 800 for (CeedInt i=0; i<impl->numein; i++) { 801 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 802 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 803 } 804 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 805 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 806 807 for (CeedInt i=0; i<impl->numeout; i++) { 808 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 809 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 810 } 811 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 812 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 813 814 815 ierr = CeedFree(&impl); CeedChk(ierr); 816 return 0; 817 } 818 819 820 /* 821 Setup infields or outfields 822 */ 823 static int CeedOperatorSetupFields_Magma(CeedQFunction qf, CeedOperator op, 824 bool inOrOut, 825 CeedVector *fullevecs, CeedVector *evecs, 826 CeedVector *qvecs, CeedInt starte, 827 CeedInt numfields, CeedInt Q) { 828 CeedInt dim = 1, ierr, ncomp; 829 Ceed ceed; 830 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 831 CeedQFunction_Magma *qf_data; 832 ierr = CeedQFunctionGetData(qf, (void *)&qf_data); CeedChk(ierr); 833 CeedBasis basis; 834 CeedElemRestriction Erestrict; 835 CeedOperatorField *opfields; 836 CeedQFunctionField *qffields; 837 if (inOrOut) { 838 ierr = CeedOperatorGetFields(op, NULL, &opfields); 839 CeedChk(ierr); 840 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 841 CeedChk(ierr); 842 } else { 843 ierr = CeedOperatorGetFields(op, &opfields, NULL); 844 CeedChk(ierr); 845 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 846 CeedChk(ierr); 847 } 848 849 // Loop over fields 850 for (CeedInt i=0; i<numfields; i++) { 851 CeedEvalMode emode; 852 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 853 if (emode != CEED_EVAL_WEIGHT) { 854 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &Erestrict); 855 CeedChk(ierr); 856 ierr = CeedElemRestrictionCreateVector(Erestrict, NULL, &fullevecs[i+starte]); 857 CeedChk(ierr); 858 } else { 859 } 860 switch(emode) { 861 case CEED_EVAL_NONE: 862 ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 863 CeedChk(ierr); 864 ierr = CeedVectorCreate(ceed, Q*ncomp, &evecs[i]); CeedChk(ierr); 865 ierr = CeedVectorCreate(ceed, Q*ncomp, &qvecs[i]); CeedChk(ierr); 866 break; // No action 867 case CEED_EVAL_INTERP: 868 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 869 ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 870 CeedChk(ierr); 871 ierr = CeedVectorCreate(ceed, Q*ncomp, &evecs[i]); CeedChk(ierr); 872 ierr = CeedVectorCreate(ceed, Q*ncomp, &qvecs[i]); CeedChk(ierr); 873 break; 874 case CEED_EVAL_GRAD: 875 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 876 ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 877 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 878 ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 879 CeedChk(ierr); 880 ierr = CeedVectorCreate(ceed, Q*ncomp, &evecs[i]); CeedChk(ierr); 881 ierr = CeedVectorCreate(ceed, Q*ncomp*dim, &qvecs[i]); CeedChk(ierr); 882 break; 883 case CEED_EVAL_WEIGHT: // Only on input fields 884 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 885 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 886 ierr = CeedVectorCreate(ceed, Q, &qvecs[i]); CeedChk(ierr); 887 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 888 NULL, qvecs[i]); CeedChk(ierr); 889 assert(starte==0); 890 break; 891 case CEED_EVAL_DIV: break; // Not implemented 892 case CEED_EVAL_CURL: break; // Not implemented 893 } 894 } 895 return 0; 896 } 897 898 /* 899 CeedOperator needs to connect all the named fields (be they active or passive) 900 to the named inputs and outputs of its CeedQFunction. 901 */ 902 static int CeedOperatorSetup_Magma(CeedOperator op) { 903 int ierr; 904 bool setupdone; 905 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 906 if (setupdone) return 0; 907 Ceed ceed; 908 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 909 CeedOperator_Magma *data; 910 ierr = CeedOperatorGetData(op, (void *)&data); CeedChk(ierr); 911 CeedQFunction qf; 912 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 913 CeedInt Q, numinputfields, numoutputfields; 914 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 915 ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 916 CeedOperatorField *opinputfields, *opoutputfields; 917 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 918 CeedChk(ierr); 919 CeedQFunctionField *qfinputfields, *qfoutputfields; 920 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 921 CeedChk(ierr); 922 923 data->numein = numinputfields; 924 data->numeout = numoutputfields; 925 926 // Allocate 927 const CeedInt numIO = numinputfields + numoutputfields; 928 929 ierr = CeedCalloc(numinputfields + numoutputfields, &data->Evecs); 930 CeedChk(ierr); 931 ierr = CeedCalloc(numinputfields + numoutputfields, &data->Edata); 932 CeedChk(ierr); 933 934 ierr = CeedCalloc(16, &data->evecsin); CeedChk(ierr); 935 ierr = CeedCalloc(16, &data->evecsout); CeedChk(ierr); 936 ierr = CeedCalloc(16, &data->qvecsin); CeedChk(ierr); 937 ierr = CeedCalloc(16, &data->qvecsout); CeedChk(ierr); 938 939 // Set up infield and outfield pointer arrays 940 // Infields 941 ierr = CeedOperatorSetupFields_Magma(qf, op, 0, data->Evecs, 942 data->evecsin, data->qvecsin, 0, 943 numinputfields, Q); 944 CeedChk(ierr); 945 // Outfields 946 ierr = CeedOperatorSetupFields_Magma(qf, op, 1, data->Evecs, 947 data->evecsout, data->qvecsout, 948 numinputfields, numoutputfields, Q); 949 CeedChk(ierr); 950 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 951 return 0; 952 } 953 954 static int CeedOperatorApply_Magma(CeedOperator op, CeedVector invec, 955 CeedVector outvec, CeedRequest *request) { 956 int ierr; 957 Ceed ceed; 958 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 959 CeedOperator_Magma *data; 960 ierr = CeedOperatorGetData(op, (void *)&data); CeedChk(ierr); 961 //CeedVector *E = data->Evecs, *D = data->D, outvec; 962 CeedInt Q, elemsize, numelements, numinputfields, numoutputfields, ncomp; 963 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 964 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 965 CeedQFunction qf; 966 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 967 CeedTransposeMode lmode; 968 CeedOperatorField *opinputfields, *opoutputfields; 969 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 970 CeedChk(ierr); 971 CeedQFunctionField *qfinputfields, *qfoutputfields; 972 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 973 CeedChk(ierr); 974 CeedEvalMode emode; 975 CeedVector vec; 976 CeedBasis basis; 977 CeedElemRestriction Erestrict; 978 979 ierr = CeedOperatorSetup_Magma(op); CeedChk(ierr); 980 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 981 CeedChk(ierr); 982 983 // Input Evecs and Restriction 984 for (CeedInt i=0; i<numinputfields; i++) { 985 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 986 CeedChk(ierr); 987 if (emode & CEED_EVAL_WEIGHT) { 988 } else { // Restriction 989 // Get input vector 990 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 991 if (vec == CEED_VECTOR_ACTIVE) 992 vec = invec; 993 // Restrict 994 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 995 CeedChk(ierr); 996 ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); CeedChk(ierr); 997 ierr = CeedElemRestrictionApply(Erestrict, CEED_NOTRANSPOSE, 998 lmode, vec, data->Evecs[i], 999 request); CeedChk(ierr); 1000 // Get evec 1001 ierr = CeedVectorGetArrayRead(data->Evecs[i], CEED_MEM_HOST, 1002 (const CeedScalar **) &data->Edata[i]); 1003 CeedChk(ierr); 1004 } 1005 } 1006 1007 // Output Evecs 1008 for (CeedInt i=0; i<numoutputfields; i++) { 1009 ierr = CeedVectorGetArray(data->Evecs[i+data->numein], CEED_MEM_HOST, 1010 &data->Edata[i + numinputfields]); CeedChk(ierr); 1011 } 1012 1013 // Loop through elements 1014 for (CeedInt e=0; e<numelements; e++) { 1015 // Input basis apply if needed 1016 for (CeedInt i=0; i<numinputfields; i++) { 1017 // Get elemsize, emode, ncomp 1018 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 1019 CeedChk(ierr); 1020 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 1021 CeedChk(ierr); 1022 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 1023 CeedChk(ierr); 1024 ierr = CeedQFunctionFieldGetNumComponents(qfinputfields[i], &ncomp); 1025 CeedChk(ierr); 1026 // Basis action 1027 switch(emode) { 1028 case CEED_EVAL_NONE: 1029 ierr = CeedVectorSetArray(data->qvecsin[i], CEED_MEM_HOST, 1030 CEED_USE_POINTER, 1031 &data->Edata[i][e*Q*ncomp]); CeedChk(ierr); 1032 break; 1033 case CEED_EVAL_INTERP: 1034 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 1035 ierr = CeedVectorSetArray(data->evecsin[i], CEED_MEM_HOST, 1036 CEED_USE_POINTER, 1037 &data->Edata[i][e*elemsize*ncomp]); 1038 CeedChk(ierr); 1039 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, 1040 CEED_EVAL_INTERP, data->evecsin[i], 1041 data->qvecsin[i]); CeedChk(ierr); 1042 break; 1043 case CEED_EVAL_GRAD: 1044 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 1045 ierr = CeedVectorSetArray(data->evecsin[i], CEED_MEM_HOST, 1046 CEED_USE_POINTER, 1047 &data->Edata[i][e*elemsize*ncomp]); 1048 CeedChk(ierr); 1049 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, 1050 CEED_EVAL_GRAD, data->evecsin[i], 1051 data->qvecsin[i]); CeedChk(ierr); 1052 break; 1053 case CEED_EVAL_WEIGHT: 1054 break; // No action 1055 case CEED_EVAL_DIV: 1056 break; // Not implemented 1057 case CEED_EVAL_CURL: 1058 break; // Not implemented 1059 } 1060 } 1061 // Output pointers 1062 for (CeedInt i=0; i<numoutputfields; i++) { 1063 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 1064 CeedChk(ierr); 1065 if (emode == CEED_EVAL_NONE) { 1066 ierr = CeedQFunctionFieldGetNumComponents(qfoutputfields[i], &ncomp); 1067 CeedChk(ierr); 1068 ierr = CeedVectorSetArray(data->qvecsout[i], CEED_MEM_HOST, 1069 CEED_USE_POINTER, 1070 &data->Edata[i + numinputfields][e*Q*ncomp]); 1071 CeedChk(ierr); 1072 } 1073 if (emode == CEED_EVAL_INTERP) { 1074 } 1075 if (emode == CEED_EVAL_GRAD) { 1076 } 1077 if (emode == CEED_EVAL_WEIGHT) { 1078 } 1079 } 1080 1081 // Q function 1082 ierr = CeedQFunctionApply(qf, Q, data->qvecsin, data->qvecsout); CeedChk(ierr); 1083 1084 // Output basis apply if needed 1085 for (CeedInt i=0; i<numoutputfields; i++) { 1086 // Get elemsize, emode, ncomp 1087 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 1088 CeedChk(ierr); 1089 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 1090 CeedChk(ierr); 1091 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 1092 CeedChk(ierr); 1093 ierr = CeedQFunctionFieldGetNumComponents(qfoutputfields[i], &ncomp); 1094 CeedChk(ierr); 1095 // Basis action 1096 switch(emode) { 1097 case CEED_EVAL_NONE: 1098 break; // No action 1099 case CEED_EVAL_INTERP: 1100 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 1101 CeedChk(ierr); 1102 ierr = CeedVectorSetArray(data->evecsout[i], CEED_MEM_HOST, 1103 CEED_USE_POINTER, 1104 &data->Edata[i + numinputfields][e*elemsize*ncomp]); 1105 ierr = CeedBasisApply(basis, 1, CEED_TRANSPOSE, 1106 CEED_EVAL_INTERP, data->qvecsout[i], 1107 data->evecsout[i]); CeedChk(ierr); 1108 break; 1109 case CEED_EVAL_GRAD: 1110 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 1111 CeedChk(ierr); 1112 ierr = CeedVectorSetArray(data->evecsout[i], CEED_MEM_HOST, 1113 CEED_USE_POINTER, 1114 &data->Edata[i + numinputfields][e*elemsize*ncomp]); 1115 ierr = CeedBasisApply(basis, 1, CEED_TRANSPOSE, 1116 CEED_EVAL_GRAD, data->qvecsout[i], 1117 data->evecsout[i]); CeedChk(ierr); 1118 break; 1119 case CEED_EVAL_WEIGHT: break; // Should not occur 1120 case CEED_EVAL_DIV: break; // Not implemented 1121 case CEED_EVAL_CURL: break; // Not implemented 1122 } 1123 } 1124 } // numelements 1125 1126 // Zero lvecs 1127 for (CeedInt i=0; i<numoutputfields; i++) { 1128 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 1129 if (vec == CEED_VECTOR_ACTIVE) 1130 vec = outvec; 1131 ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 1132 } 1133 1134 // Output restriction 1135 for (CeedInt i=0; i<numoutputfields; i++) { 1136 // Restore evec 1137 ierr = CeedVectorRestoreArray(data->Evecs[i+data->numein], 1138 &data->Edata[i + numinputfields]); 1139 CeedChk(ierr); 1140 // Get output vector 1141 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 1142 // Active 1143 if (vec == CEED_VECTOR_ACTIVE) 1144 vec = outvec; 1145 // Restrict 1146 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 1147 CeedChk(ierr); 1148 ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr); 1149 ierr = CeedElemRestrictionApply(Erestrict, CEED_TRANSPOSE, 1150 lmode, data->Evecs[i+data->numein], vec, 1151 request); CeedChk(ierr); 1152 } 1153 1154 // Restore input arrays 1155 for (CeedInt i=0; i<numinputfields; i++) { 1156 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 1157 CeedChk(ierr); 1158 if (emode & CEED_EVAL_WEIGHT) { 1159 } else { 1160 // Restriction 1161 ierr = CeedVectorRestoreArrayRead(data->Evecs[i], 1162 (const CeedScalar **) &data->Edata[i]); 1163 CeedChk(ierr); 1164 } 1165 } 1166 return 0; 1167 } 1168 1169 static int CeedOperatorCreate_Magma(CeedOperator op) { 1170 int ierr; 1171 CeedOperator_Magma *impl; 1172 Ceed ceed; 1173 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 1174 1175 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 1176 ierr = CeedOperatorSetData(op, (void *)&impl); 1177 1178 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 1179 CeedOperatorApply_Magma); CeedChk(ierr); 1180 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 1181 CeedOperatorDestroy_Magma); CeedChk(ierr); 1182 return 0; 1183 } 1184 1185 int CeedCompositeOperatorCreate_Magma(CeedOperator op) { 1186 int ierr; 1187 Ceed ceed; 1188 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 1189 return CeedError(ceed, 1, "Backend does not support composite operators"); 1190 } 1191 1192 // ***************************************************************************** 1193 // * INIT 1194 // ***************************************************************************** 1195 static int CeedInit_Magma(const char *resource, Ceed ceed) { 1196 int ierr; 1197 if (strcmp(resource, "/gpu/magma")) 1198 return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 1199 1200 ierr = magma_init(); 1201 if (ierr) return CeedError(ceed, 1, "error in magma_init(): %d\n", ierr); 1202 //magma_print_environment(); 1203 1204 ceed->VectorCreate = CeedVectorCreate_Magma; 1205 ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 1206 ceed->BasisCreateH1 = CeedBasisCreateH1_Magma; 1207 ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 1208 ceed->ElemRestrictionCreateBlocked = CeedElemRestrictionCreateBlocked_Magma; 1209 ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 1210 ceed->OperatorCreate = CeedOperatorCreate_Magma; 1211 ceed->CompositeOperatorCreate = CeedCompositeOperatorCreate_Magma; 1212 return 0; 1213 } 1214 1215 // ***************************************************************************** 1216 // * REGISTER 1217 // ***************************************************************************** 1218 __attribute__((constructor)) 1219 static void Register(void) { 1220 CeedRegister("/gpu/magma", CeedInit_Magma, 20); 1221 } 1222