182b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 282b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 382b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details. 482b77998SStan Tomov // 582b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 682b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 782b77998SStan Tomov // element discretizations for exascale applications. For more information and 882b77998SStan Tomov // source code availability see http://github.com/ceed. 982b77998SStan Tomov // 1082b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1182b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 1282b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 1382b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 1482b77998SStan Tomov // software, applications, hardware, advanced system engineering and early 1582b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 1682b77998SStan Tomov 1738612b08SStan Tomov #include "ceed-magma.h" 185a9ca9adSVeselin Dobrev #include <string.h> 1982b77998SStan Tomov 2082b77998SStan Tomov typedef struct { 2182b77998SStan Tomov CeedScalar *array; 2293fbe329SStan Tomov CeedScalar *darray; 23e2900954SStan Tomov int own_; 24c119dad6SStan Tomov int down_; 2582b77998SStan Tomov } CeedVector_Magma; 2682b77998SStan Tomov 2782b77998SStan Tomov typedef struct { 281751b0a9SStan Tomov CeedInt *indices; 291751b0a9SStan Tomov CeedInt *dindices; 301751b0a9SStan Tomov int own_; 31c119dad6SStan Tomov int down_; // cover a case where we own Device memory 3282b77998SStan Tomov } CeedElemRestriction_Magma; 3382b77998SStan Tomov 3482b77998SStan Tomov typedef struct { 3516383ffcSjeremylt const CeedScalar **inputs; 3616383ffcSjeremylt CeedScalar **outputs; 3716383ffcSjeremylt } CeedQFunction_Magma; 3816383ffcSjeremylt 3916383ffcSjeremylt typedef struct { 4016383ffcSjeremylt CeedVector *Evecs; /// E-vectors needed to apply operator (in followed by out) 4116383ffcSjeremylt CeedScalar **Edata; 4216383ffcSjeremylt CeedVector *evecsin; /// Input E-vectors needed to apply operator 4316383ffcSjeremylt CeedVector *evecsout; /// Output E-vectors needed to apply operator 4416383ffcSjeremylt CeedVector *qvecsin; /// Input Q-vectors needed to apply operator 4516383ffcSjeremylt CeedVector *qvecsout; /// Output Q-vectors needed to apply operator 463bfcb0deSjeremylt CeedInt numein; 473bfcb0deSjeremylt CeedInt numeout; 4882b77998SStan Tomov } CeedOperator_Magma; 4982b77998SStan Tomov 5093fbe329SStan Tomov // ***************************************************************************** 5193fbe329SStan Tomov // * Initialize vector vec (after free mem) with values from array based on cmode 5293fbe329SStan Tomov // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 5393fbe329SStan Tomov // * to array, and data is copied (not store passed pointer) 5493fbe329SStan Tomov // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 5593fbe329SStan Tomov // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 5693fbe329SStan Tomov // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 5793fbe329SStan Tomov // ***************************************************************************** 5882b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 5982b77998SStan Tomov CeedCopyMode cmode, CeedScalar *array) { 6082b77998SStan Tomov CeedVector_Magma *impl = vec->data; 6182b77998SStan Tomov int ierr; 6282b77998SStan Tomov 63c119dad6SStan Tomov // If own data, free the "old" data, e.g., as it may be of different size 64e2900954SStan Tomov if (impl->own_) { 65e2900954SStan Tomov magma_free( impl->darray ); 66e2900954SStan Tomov magma_free_pinned( impl->array ); 67e2900954SStan Tomov impl->darray = NULL; 68e2900954SStan Tomov impl->array = NULL; 69e2900954SStan Tomov impl->own_ = 0; 70c119dad6SStan Tomov impl->down_= 0; 71e2900954SStan Tomov } 7293fbe329SStan Tomov 73e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 74e2900954SStan Tomov // memory is on the host; own_ = 0 7582b77998SStan Tomov switch (cmode) { 7682b77998SStan Tomov case CEED_COPY_VALUES: 7793fbe329SStan Tomov ierr = magma_malloc( (void **)&impl->darray, 78e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 79e2900954SStan Tomov ierr = magma_malloc_pinned( (void **)&impl->array, 80e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 81e2900954SStan Tomov impl->own_ = 1; 82e2900954SStan Tomov 83c119dad6SStan Tomov if (array != NULL) 8493fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 8593fbe329SStan Tomov array, 1, impl->darray, 1); 8682b77998SStan Tomov break; 8782b77998SStan Tomov case CEED_OWN_POINTER: 8893fbe329SStan Tomov ierr = magma_malloc( (void **)&impl->darray, 89e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 901751b0a9SStan Tomov // TODO: possible problem here is if we are passed non-pinned memory; 911751b0a9SStan Tomov // (as we own it, lter in destroy, we use free for pinned memory). 92e2900954SStan Tomov impl->array = array; 93e2900954SStan Tomov impl->own_ = 1; 94e2900954SStan Tomov 95c119dad6SStan Tomov if (array != NULL) 9693fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 9793fbe329SStan Tomov array, 1, impl->darray, 1); 9882b77998SStan Tomov break; 9982b77998SStan Tomov case CEED_USE_POINTER: 100c119dad6SStan Tomov ierr = magma_malloc( (void **)&impl->darray, 101c119dad6SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 102c119dad6SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 103c119dad6SStan Tomov array, 1, impl->darray, 1); 10406b636dbSStan Tomov 10506b636dbSStan Tomov impl->down_ = 1; 10682b77998SStan Tomov impl->array = array; 10782b77998SStan Tomov } 108e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 109e2900954SStan Tomov // memory is on the device; own = 0 110e2900954SStan Tomov switch (cmode) { 111e2900954SStan Tomov case CEED_COPY_VALUES: 112e2900954SStan Tomov ierr = magma_malloc( (void **)&impl->darray, 113e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 114e2900954SStan Tomov ierr = magma_malloc_pinned( (void **)&impl->array, 115e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 116e2900954SStan Tomov impl->own_ = 1; 117e2900954SStan Tomov 118e2900954SStan Tomov if (array) 119e2900954SStan Tomov magma_copyvector(vec->length, sizeof(array[0]), 120e2900954SStan Tomov array, 1, impl->darray, 1); 12106b636dbSStan Tomov else 12206b636dbSStan Tomov // t30 assumes allocation initializes with 0s 12306b636dbSStan Tomov magma_setvector(vec->length, sizeof(array[0]), 12406b636dbSStan Tomov impl->array, 1, impl->darray, 1); 125e2900954SStan Tomov break; 126e2900954SStan Tomov case CEED_OWN_POINTER: 127e2900954SStan Tomov impl->darray = array; 128e2900954SStan Tomov ierr = magma_malloc_pinned( (void **)&impl->array, 129e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 130e2900954SStan Tomov impl->own_ = 1; 131e2900954SStan Tomov 132e2900954SStan Tomov break; 133e2900954SStan Tomov case CEED_USE_POINTER: 134e2900954SStan Tomov impl->darray = array; 135e2900954SStan Tomov impl->array = NULL; 136e2900954SStan Tomov } 137e2900954SStan Tomov 138e2900954SStan Tomov } else 139e2900954SStan Tomov return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 140e2900954SStan Tomov 14182b77998SStan Tomov return 0; 14282b77998SStan Tomov } 14382b77998SStan Tomov 14493fbe329SStan Tomov // ***************************************************************************** 145e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) 14693fbe329SStan Tomov // ***************************************************************************** 14782b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 14882b77998SStan Tomov CeedScalar **array) { 14982b77998SStan Tomov CeedVector_Magma *impl = vec->data; 15082b77998SStan Tomov int ierr; 15182b77998SStan Tomov 152e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 153c119dad6SStan Tomov if (impl->own_) { 154e2900954SStan Tomov // data is owned so GPU had the most up-to-date version; copy it 15506b636dbSStan Tomov // TTT - apparantly it doesn't have most up to date data 156e2900954SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 157e2900954SStan Tomov impl->darray, 1, impl->array, 1); 15806b636dbSStan Tomov CeedDebug("\033[31m[CeedVectorGetArray_Magma]"); 15906b636dbSStan Tomov //fprintf(stderr,"rrrrrrrrrrrrrrr\n"); 160c119dad6SStan Tomov } else if (impl->array == NULL) { 161c119dad6SStan Tomov // Vector doesn't own the data and was set on GPU 162c119dad6SStan Tomov if (impl->darray == NULL) { 163c119dad6SStan Tomov // call was made just to allocate memory 164c119dad6SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 165c119dad6SStan Tomov CeedChk(ierr); 166c119dad6SStan Tomov } else 167c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 16882b77998SStan Tomov } 16982b77998SStan Tomov *array = impl->array; 170e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 171c119dad6SStan Tomov if (impl->darray == NULL) { 172c119dad6SStan Tomov // Vector doesn't own the data and was set on the CPU 173c119dad6SStan Tomov if (impl->array == NULL) { 174c119dad6SStan Tomov // call was made just to allocate memory 175e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 176e2900954SStan Tomov CeedChk(ierr); 177c119dad6SStan Tomov } else 178c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 179e2900954SStan Tomov } 180e2900954SStan Tomov *array = impl->darray; 181e2900954SStan Tomov } else 182e2900954SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 183e2900954SStan Tomov 18482b77998SStan Tomov return 0; 18582b77998SStan Tomov } 18682b77998SStan Tomov 18793fbe329SStan Tomov // ***************************************************************************** 188e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 18993fbe329SStan Tomov // ***************************************************************************** 19082b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 19182b77998SStan Tomov const CeedScalar **array) { 19282b77998SStan Tomov CeedVector_Magma *impl = vec->data; 19382b77998SStan Tomov int ierr; 19482b77998SStan Tomov 195e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 196c119dad6SStan Tomov if (impl->own_) { 197e2900954SStan Tomov // data is owned so GPU had the most up-to-date version; copy it 198e2900954SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 199e2900954SStan Tomov impl->darray, 1, impl->array, 1); 200c119dad6SStan Tomov } else if (impl->array == NULL) { 201c119dad6SStan Tomov // Vector doesn't own the data and was set on GPU 202c119dad6SStan Tomov if (impl->darray == NULL) { 203c119dad6SStan Tomov // call was made just to allocate memory 204c119dad6SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 205c119dad6SStan Tomov CeedChk(ierr); 206c119dad6SStan Tomov } else 207c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 20882b77998SStan Tomov } 20982b77998SStan Tomov *array = impl->array; 210e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 211c119dad6SStan Tomov if (impl->darray == NULL) { 212c119dad6SStan Tomov // Vector doesn't own the data and was set on the CPU 213c119dad6SStan Tomov if (impl->array == NULL) { 214c119dad6SStan Tomov // call was made just to allocate memory 215e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 216e2900954SStan Tomov CeedChk(ierr); 217c119dad6SStan Tomov } else 218c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 219e2900954SStan Tomov } 220e2900954SStan Tomov *array = impl->darray; 221e2900954SStan Tomov } else 222e2900954SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 223e2900954SStan Tomov 22482b77998SStan Tomov return 0; 22582b77998SStan Tomov } 22682b77998SStan Tomov 22793fbe329SStan Tomov // ***************************************************************************** 228e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST 229e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if 230e2900954SStan Tomov // * it was GPU memory we would not call this routine at all. 23193fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 23293fbe329SStan Tomov // * from vec and possibly modified them. 23393fbe329SStan Tomov // ***************************************************************************** 23482b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 23593fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 23693fbe329SStan Tomov 237c119dad6SStan Tomov // Check if the array is a CPU pointer 238c119dad6SStan Tomov if (*array == impl->array) { 239c119dad6SStan Tomov // Update device, if the device pointer is not NULL 240c119dad6SStan Tomov if (impl->darray != NULL) { 24193fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 24293fbe329SStan Tomov *array, 1, impl->darray, 1); 2435a9ca9adSVeselin Dobrev } else { 2445a9ca9adSVeselin Dobrev // nothing to do (case of CPU use pointer) 2455a9ca9adSVeselin Dobrev } 246c119dad6SStan Tomov 247c119dad6SStan Tomov } else if (impl->down_) { 248c119dad6SStan Tomov // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 249c119dad6SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 250c119dad6SStan Tomov impl->darray, 1, impl->array, 1); 251c119dad6SStan Tomov } 252e2900954SStan Tomov 25382b77998SStan Tomov *array = NULL; 25482b77998SStan Tomov return 0; 25582b77998SStan Tomov } 25682b77998SStan Tomov 25793fbe329SStan Tomov // ***************************************************************************** 258e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST 259e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if 260e2900954SStan Tomov // * it was GPU memory we would not call this routine at all. 26193fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 26293fbe329SStan Tomov // * from vec to only read them; in this case vec may have been modified meanwhile 26393fbe329SStan Tomov // * and needs to be restored here. 26493fbe329SStan Tomov // ***************************************************************************** 26582b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 26682b77998SStan Tomov const CeedScalar **array) { 26793fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 26893fbe329SStan Tomov 269c119dad6SStan Tomov // Check if the array is a CPU pointer 270c119dad6SStan Tomov if (*array == impl->array) { 271c119dad6SStan Tomov // Update device, if the device pointer is not NULL 2725a9ca9adSVeselin Dobrev if (impl->darray != NULL) { 27393fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 27493fbe329SStan Tomov *array, 1, impl->darray, 1); 2755a9ca9adSVeselin Dobrev } else { 2765a9ca9adSVeselin Dobrev // nothing to do (case of CPU use pointer) 2775a9ca9adSVeselin Dobrev } 278c119dad6SStan Tomov 279c119dad6SStan Tomov } else if (impl->down_) { 280c119dad6SStan Tomov // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 281c119dad6SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 282c119dad6SStan Tomov impl->darray, 1, impl->array, 1); 283c119dad6SStan Tomov } 284e2900954SStan Tomov 28582b77998SStan Tomov *array = NULL; 28682b77998SStan Tomov return 0; 28782b77998SStan Tomov } 28882b77998SStan Tomov 28982b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) { 29082b77998SStan Tomov CeedVector_Magma *impl = vec->data; 29182b77998SStan Tomov int ierr; 29282b77998SStan Tomov 293e2900954SStan Tomov // Free if we own the data 294e2900954SStan Tomov if (impl->own_) { 295e2900954SStan Tomov ierr = magma_free_pinned(impl->array); CeedChk(ierr); 29693fbe329SStan Tomov ierr = magma_free(impl->darray); CeedChk(ierr); 297c119dad6SStan Tomov } else if (impl->down_) { 298c119dad6SStan Tomov ierr = magma_free(impl->darray); CeedChk(ierr); 299e2900954SStan Tomov } 30082b77998SStan Tomov ierr = CeedFree(&vec->data); CeedChk(ierr); 30182b77998SStan Tomov return 0; 30282b77998SStan Tomov } 30382b77998SStan Tomov 30493fbe329SStan Tomov // ***************************************************************************** 30593fbe329SStan Tomov // * Create vector vec of size n 30693fbe329SStan Tomov // ***************************************************************************** 307667bc5fcSjeremylt static int CeedVectorCreate_Magma(CeedInt n, CeedVector vec) { 30882b77998SStan Tomov CeedVector_Magma *impl; 30982b77998SStan Tomov int ierr; 31082b77998SStan Tomov 31182b77998SStan Tomov vec->SetArray = CeedVectorSetArray_Magma; 31282b77998SStan Tomov vec->GetArray = CeedVectorGetArray_Magma; 31382b77998SStan Tomov vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 31482b77998SStan Tomov vec->RestoreArray = CeedVectorRestoreArray_Magma; 31582b77998SStan Tomov vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 31682b77998SStan Tomov vec->Destroy = CeedVectorDestroy_Magma; 31782b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 31893fbe329SStan Tomov impl->darray = NULL; 319e2900954SStan Tomov impl->array = NULL; 320e2900954SStan Tomov impl->own_ = 0; 321c119dad6SStan Tomov impl->down_= 0; 32282b77998SStan Tomov vec->data = impl; 32382b77998SStan Tomov return 0; 32482b77998SStan Tomov } 32582b77998SStan Tomov 3263bfcb0deSjeremylt 32793fbe329SStan Tomov // ***************************************************************************** 32893fbe329SStan Tomov // * Apply restriction operator r to u: v = r(rmode) u 32993fbe329SStan Tomov // ***************************************************************************** 33082b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 3313bfcb0deSjeremylt CeedTransposeMode tmode, 33282b77998SStan Tomov CeedTransposeMode lmode, CeedVector u, 33382b77998SStan Tomov CeedVector v, CeedRequest *request) { 33482b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 33582b77998SStan Tomov int ierr; 33682b77998SStan Tomov const CeedScalar *uu; 33782b77998SStan Tomov CeedScalar *vv; 3383bfcb0deSjeremylt CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof, 3393bfcb0deSjeremylt ncomp=r->ncomp; 340c119dad6SStan Tomov CeedInt esize = nelem * elemsize; 34106b636dbSStan Tomov 34206b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 343137a0714SStan Tomov CeedInt *dindices = impl->dindices; 344c119dad6SStan Tomov // Get pointers on the device 345c119dad6SStan Tomov ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr); 346c119dad6SStan Tomov ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr); 34706b636dbSStan Tomov #else 34806b636dbSStan Tomov CeedInt *indices = impl->indices; 34906b636dbSStan Tomov ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 35006b636dbSStan Tomov ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 35106b636dbSStan Tomov #endif 352c119dad6SStan Tomov 35382b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 35482b77998SStan Tomov // Perform: v = r * u 355135a076eSjeremylt if (!impl->indices) { 356135a076eSjeremylt for (CeedInt i=0; i<esize*ncomp; i++) vv[i] = uu[i]; 357135a076eSjeremylt } else if (ncomp == 1) { 35806b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 359c119dad6SStan Tomov magma_template<<i=0:esize>> 360c119dad6SStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 361c119dad6SStan Tomov vv[i] = uu[dindices[i]]; 362e2900954SStan Tomov } 36306b636dbSStan Tomov #else 36406b636dbSStan Tomov for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]]; 36506b636dbSStan Tomov #endif 36682b77998SStan Tomov } else { 36782b77998SStan Tomov // vv is (elemsize x ncomp x nelem), column-major 36882b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 36906b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 37006b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 37106b636dbSStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) { 37206b636dbSStan Tomov vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d]; 37306b636dbSStan Tomov } 37406b636dbSStan Tomov #else 375c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 37682b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 377c119dad6SStan Tomov for (CeedInt i=0; i < elemsize; i++) { 378c119dad6SStan Tomov vv[i + elemsize*(d+ncomp*e)] = 379c119dad6SStan Tomov uu[indices[i+elemsize*e]+ndof*d]; 380c119dad6SStan Tomov } 38106b636dbSStan Tomov #endif 38282b77998SStan Tomov } else { // u is (ncomp x ndof), column-major 38306b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 38406b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 38506b636dbSStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 38606b636dbSStan Tomov vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]]; 38706b636dbSStan Tomov } 38806b636dbSStan Tomov #else 389c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 39082b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 391c119dad6SStan Tomov for (CeedInt i=0; i< elemsize; i++) { 392c119dad6SStan Tomov vv[i + elemsize*(d+ncomp*e)] = 393c119dad6SStan Tomov uu[d+ncomp*indices[i+elemsize*e]]; 394c119dad6SStan Tomov } 39506b636dbSStan Tomov #endif 39682b77998SStan Tomov } 39782b77998SStan Tomov } 39882b77998SStan Tomov } else { 39982b77998SStan Tomov // Note: in transpose mode, we perform: v += r^t * u 400135a076eSjeremylt if (!impl->indices) { 401135a076eSjeremylt for (CeedInt i=0; i<esize; i++) vv[i] += uu[i]; 402135a076eSjeremylt } else if (ncomp == 1) { 403c119dad6SStan Tomov // fprintf(stderr,"3 ---------\n"); 40406b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 405c119dad6SStan Tomov magma_template<<i=0:esize>> 406c119dad6SStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 407c119dad6SStan Tomov magmablas_datomic_add( &vv[dindices[i]], uu[i]); 408c119dad6SStan Tomov } 40906b636dbSStan Tomov #else 41006b636dbSStan Tomov for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i]; 41106b636dbSStan Tomov #endif 412c119dad6SStan Tomov } else { // u is (elemsize x ncomp x nelem) 413c119dad6SStan Tomov fprintf(stderr,"2 ---------\n"); 414c119dad6SStan Tomov 41582b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 41606b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 41706b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 41806b636dbSStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) { 41906b636dbSStan Tomov magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d], 42006b636dbSStan Tomov uu[i+iend*(d+e*dend)]); 42106b636dbSStan Tomov } 42206b636dbSStan Tomov #else 423c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 42482b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 425c119dad6SStan Tomov for (CeedInt i=0; i < elemsize; i++) { 426c119dad6SStan Tomov vv[indices[i + elemsize*e]+ndof*d] += 427c119dad6SStan Tomov uu[i + elemsize*(d+e*ncomp)]; 428c119dad6SStan Tomov } 42906b636dbSStan Tomov #endif 43006b636dbSStan Tomov } else { // vv is (ncomp x ndof), column-major 43106b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 432c119dad6SStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 43306b636dbSStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 43406b636dbSStan Tomov magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]], 435c119dad6SStan Tomov uu[i+iend*(d+e*dend)]); 43682b77998SStan Tomov } 43706b636dbSStan Tomov #else 438c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 43982b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 440c119dad6SStan Tomov for (CeedInt i=0; i < elemsize; i++) { 441c119dad6SStan Tomov vv[d+ncomp*indices[i + elemsize*e]] += 442c119dad6SStan Tomov uu[i + elemsize*(d+e*ncomp)]; 443c119dad6SStan Tomov } 44406b636dbSStan Tomov #endif 44582b77998SStan Tomov } 44682b77998SStan Tomov } 44782b77998SStan Tomov } 44806b636dbSStan Tomov 44982b77998SStan Tomov ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 45082b77998SStan Tomov ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 451c119dad6SStan Tomov 45282b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 45382b77998SStan Tomov *request = NULL; 45482b77998SStan Tomov return 0; 45582b77998SStan Tomov } 45682b77998SStan Tomov 45782b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 45882b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 45982b77998SStan Tomov int ierr; 46082b77998SStan Tomov 4611751b0a9SStan Tomov // Free if we own the data 4621751b0a9SStan Tomov if (impl->own_) { 4631751b0a9SStan Tomov ierr = magma_free_pinned(impl->indices); CeedChk(ierr); 4641751b0a9SStan Tomov ierr = magma_free(impl->dindices); CeedChk(ierr); 4651981b6e4STzanio } else if (impl->down_) { 466c119dad6SStan Tomov ierr = magma_free(impl->dindices); CeedChk(ierr); 467c119dad6SStan Tomov } 46882b77998SStan Tomov ierr = CeedFree(&r->data); CeedChk(ierr); 46982b77998SStan Tomov return 0; 47082b77998SStan Tomov } 47182b77998SStan Tomov 472667bc5fcSjeremylt static int CeedElemRestrictionCreate_Magma(CeedMemType mtype, 47393fbe329SStan Tomov CeedCopyMode cmode, 474667bc5fcSjeremylt const CeedInt *indices, CeedElemRestriction r) { 4751751b0a9SStan Tomov int ierr, size = r->nelem*r->elemsize; 47682b77998SStan Tomov CeedElemRestriction_Magma *impl; 47782b77998SStan Tomov 4781751b0a9SStan Tomov // Allocate memory for the MAGMA Restricton and initializa pointers to NULL 47982b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 4801751b0a9SStan Tomov impl->dindices = NULL; 4811751b0a9SStan Tomov impl->indices = NULL; 4821751b0a9SStan Tomov impl->own_ = 0; 483c119dad6SStan Tomov impl->down_= 0; 4841751b0a9SStan Tomov 4851751b0a9SStan Tomov if (mtype == CEED_MEM_HOST) { 4861751b0a9SStan Tomov // memory is on the host; own_ = 0 48782b77998SStan Tomov switch (cmode) { 48882b77998SStan Tomov case CEED_COPY_VALUES: 4891751b0a9SStan Tomov ierr = magma_malloc( (void **)&impl->dindices, 4901751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 4911751b0a9SStan Tomov ierr = magma_malloc_pinned( (void **)&impl->indices, 4921751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 4931751b0a9SStan Tomov impl->own_ = 1; 4941751b0a9SStan Tomov 49506b636dbSStan Tomov if (indices != NULL) { 49606b636dbSStan Tomov memcpy(impl->indices, indices, size * sizeof(indices[0])); 4971751b0a9SStan Tomov magma_setvector(size, sizeof(CeedInt), 49806b636dbSStan Tomov impl->indices, 1, impl->dindices, 1); 49906b636dbSStan Tomov } 50082b77998SStan Tomov break; 50182b77998SStan Tomov case CEED_OWN_POINTER: 5021751b0a9SStan Tomov ierr = magma_malloc( (void **)&impl->dindices, 5031751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 5041751b0a9SStan Tomov // TODO: possible problem here is if we are passed non-pinned memory; 5051751b0a9SStan Tomov // (as we own it, lter in destroy, we use free for pinned memory). 506137a0714SStan Tomov impl->indices = (CeedInt *)indices; 5071751b0a9SStan Tomov impl->own_ = 1; 5081751b0a9SStan Tomov 509c119dad6SStan Tomov if (indices != NULL) 5101751b0a9SStan Tomov magma_setvector(size, sizeof(CeedInt), 5111751b0a9SStan Tomov indices, 1, impl->dindices, 1); 51282b77998SStan Tomov break; 51382b77998SStan Tomov case CEED_USE_POINTER: 514c119dad6SStan Tomov ierr = magma_malloc( (void **)&impl->dindices, 515c119dad6SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 516c119dad6SStan Tomov magma_setvector(size, sizeof(CeedInt), 517c119dad6SStan Tomov indices, 1, impl->dindices, 1); 518c119dad6SStan Tomov impl->down_ = 1; 519137a0714SStan Tomov impl->indices = (CeedInt *)indices; 52082b77998SStan Tomov } 5211751b0a9SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 5221751b0a9SStan Tomov // memory is on the device; own = 0 5231751b0a9SStan Tomov switch (cmode) { 5241751b0a9SStan Tomov case CEED_COPY_VALUES: 5251751b0a9SStan Tomov ierr = magma_malloc( (void **)&impl->dindices, 5261751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 5271751b0a9SStan Tomov ierr = magma_malloc_pinned( (void **)&impl->indices, 5281751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 5291751b0a9SStan Tomov impl->own_ = 1; 5301751b0a9SStan Tomov 5311751b0a9SStan Tomov if (indices) 5321751b0a9SStan Tomov magma_copyvector(size, sizeof(CeedInt), 5331751b0a9SStan Tomov indices, 1, impl->dindices, 1); 5341751b0a9SStan Tomov break; 5351751b0a9SStan Tomov case CEED_OWN_POINTER: 536137a0714SStan Tomov impl->dindices = (CeedInt *)indices; 5371751b0a9SStan Tomov ierr = magma_malloc_pinned( (void **)&impl->indices, 5381751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 5391751b0a9SStan Tomov impl->own_ = 1; 5401751b0a9SStan Tomov 5411751b0a9SStan Tomov break; 5421751b0a9SStan Tomov case CEED_USE_POINTER: 543137a0714SStan Tomov impl->dindices = (CeedInt *)indices; 5441751b0a9SStan Tomov impl->indices = NULL; 5451751b0a9SStan Tomov } 5461751b0a9SStan Tomov 5471751b0a9SStan Tomov } else 5481751b0a9SStan Tomov return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported"); 5491751b0a9SStan Tomov 55082b77998SStan Tomov r->data = impl; 55182b77998SStan Tomov r->Apply = CeedElemRestrictionApply_Magma; 55282b77998SStan Tomov r->Destroy = CeedElemRestrictionDestroy_Magma; 5531751b0a9SStan Tomov 55482b77998SStan Tomov return 0; 55582b77998SStan Tomov } 55682b77998SStan Tomov 557667bc5fcSjeremylt static int CeedElemRestrictionCreateBlocked_Magma(CeedMemType mtype, 5584cc6aec3Sjeremylt CeedCopyMode cmode, 559667bc5fcSjeremylt const CeedInt *indices, CeedElemRestriction r) { 5604cc6aec3Sjeremylt return CeedError(r->ceed, 1, "Backend does not implement blocked restrictions"); 5614cc6aec3Sjeremylt } 5624cc6aec3Sjeremylt 56382b77998SStan Tomov // Contracts on the middle index 56482b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc 56582b77998SStan Tomov // TRANSPOSE: V_ajc = T_bj U_abc 56682b77998SStan Tomov // If Add != 0, "=" is replaced by "+=" 56782b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed, 56882b77998SStan Tomov CeedInt A, CeedInt B, CeedInt C, CeedInt J, 56982b77998SStan Tomov const CeedScalar *t, CeedTransposeMode tmode, 57082b77998SStan Tomov const CeedInt Add, 57182b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 57238612b08SStan Tomov #ifdef USE_MAGMA_BATCH 57338612b08SStan Tomov magma_dtensor_contract(ceed, A, B, C, J, t, tmode, Add, u, v); 57438612b08SStan Tomov #else 57582b77998SStan Tomov CeedInt tstride0 = B, tstride1 = 1; 57682b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 57782b77998SStan Tomov tstride0 = 1; tstride1 = J; 57882b77998SStan Tomov } 57938612b08SStan Tomov CeedDebug("\033[31m[CeedTensorContract] A=%d, J=%d, C=%d, B=%d: %d %d %d", 58038612b08SStan Tomov A,J,C,B,A*J*B*C, C*J*A, C*B*A); 58182b77998SStan Tomov for (CeedInt a=0; a<A; a++) { 58282b77998SStan Tomov for (CeedInt j=0; j<J; j++) { 58382b77998SStan Tomov if (!Add) { 58482b77998SStan Tomov for (CeedInt c=0; c<C; c++) 58582b77998SStan Tomov v[(a*J+j)*C+c] = 0; 58682b77998SStan Tomov } 58782b77998SStan Tomov for (CeedInt b=0; b<B; b++) { 58882b77998SStan Tomov for (CeedInt c=0; c<C; c++) { 58982b77998SStan Tomov v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 59082b77998SStan Tomov } 59182b77998SStan Tomov } 59282b77998SStan Tomov } 59382b77998SStan Tomov } 59438612b08SStan Tomov #endif 59582b77998SStan Tomov return 0; 59682b77998SStan Tomov } 59782b77998SStan Tomov 598d3181881Sjeremylt static int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, 599d3181881Sjeremylt CeedTransposeMode tmode, CeedEvalMode emode, 60016383ffcSjeremylt CeedVector U, CeedVector V) { 60182b77998SStan Tomov int ierr; 60282b77998SStan Tomov const CeedInt dim = basis->dim; 6030f5de9e9Sjeremylt const CeedInt ncomp = basis->ncomp; 6040f5de9e9Sjeremylt const CeedInt nqpt = ncomp*CeedPowInt(basis->Q1d, dim); 60582b77998SStan Tomov const CeedInt add = (tmode == CEED_TRANSPOSE); 60616383ffcSjeremylt const CeedScalar *u; 60716383ffcSjeremylt CeedScalar *v; 60816383ffcSjeremylt if (U) { 60916383ffcSjeremylt ierr = CeedVectorGetArrayRead(U, CEED_MEM_HOST, &u); CeedChk(ierr); 61016383ffcSjeremylt } else if (emode != CEED_EVAL_WEIGHT) { 61116383ffcSjeremylt return CeedError(ceed, 1, 61216383ffcSjeremylt "An input vector is required for this CeedEvalMode"); 61316383ffcSjeremylt } 61416383ffcSjeremylt ierr = CeedVectorGetArray(V, CEED_MEM_HOST, &v); CeedChk(ierr); 61582b77998SStan Tomov 616c44a85f4Sjeremylt if (nelem != 1) 617c44a85f4Sjeremylt return CeedError(basis->ceed, 1, 618c44a85f4Sjeremylt "This backend does not support BasisApply for multiple elements"); 619c44a85f4Sjeremylt 62038612b08SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d", 6210f5de9e9Sjeremylt ncomp*CeedPowInt(basis->P1d, dim)); 62238612b08SStan Tomov 62382b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 6240f5de9e9Sjeremylt const CeedInt vsize = ncomp*CeedPowInt(basis->P1d, dim); 62582b77998SStan Tomov for (CeedInt i = 0; i < vsize; i++) 62682b77998SStan Tomov v[i] = (CeedScalar) 0; 62782b77998SStan Tomov } 62882b77998SStan Tomov if (emode & CEED_EVAL_INTERP) { 62982b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 63082b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 63182b77998SStan Tomov P = basis->Q1d; Q = basis->P1d; 63282b77998SStan Tomov } 6330f5de9e9Sjeremylt CeedInt pre = ncomp*CeedPowInt(P, dim-1), post = 1; 6340f5de9e9Sjeremylt CeedScalar tmp[2][ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 63538612b08SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 6360f5de9e9Sjeremylt ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)); 63782b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 63882b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 63982b77998SStan Tomov tmode, add&&(d==dim-1), 64082b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 64182b77998SStan Tomov CeedChk(ierr); 64282b77998SStan Tomov pre /= P; 64382b77998SStan Tomov post *= Q; 64482b77998SStan Tomov } 64582b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 64682b77998SStan Tomov v += nqpt; 64782b77998SStan Tomov } else { 64882b77998SStan Tomov u += nqpt; 64982b77998SStan Tomov } 65082b77998SStan Tomov } 65182b77998SStan Tomov if (emode & CEED_EVAL_GRAD) { 65282b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 65382b77998SStan Tomov // In CEED_NOTRANSPOSE mode: 6540f5de9e9Sjeremylt // u is (P^dim x nc), column-major layout (nc = ncomp) 6550f5de9e9Sjeremylt // v is (Q^dim x nc x dim), column-major layout (nc = ncomp) 65682b77998SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 65782b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 65882b77998SStan Tomov P = basis->Q1d, Q = basis->P1d; 65982b77998SStan Tomov } 6600f5de9e9Sjeremylt CeedScalar tmp[2][ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 66138612b08SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 6620f5de9e9Sjeremylt ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)); 66382b77998SStan Tomov for (CeedInt p = 0; p < dim; p++) { 6640f5de9e9Sjeremylt CeedInt pre = ncomp*CeedPowInt(P, dim-1), post = 1; 66582b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 66682b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 66782b77998SStan Tomov (p==d)?basis->grad1d:basis->interp1d, 66882b77998SStan Tomov tmode, add&&(d==dim-1), 66982b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 67082b77998SStan Tomov CeedChk(ierr); 67182b77998SStan Tomov pre /= P; 67282b77998SStan Tomov post *= Q; 67382b77998SStan Tomov } 67482b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 67582b77998SStan Tomov v += nqpt; 67682b77998SStan Tomov } else { 67782b77998SStan Tomov u += nqpt; 67882b77998SStan Tomov } 67982b77998SStan Tomov } 68082b77998SStan Tomov } 68182b77998SStan Tomov if (emode & CEED_EVAL_WEIGHT) { 68282b77998SStan Tomov if (tmode == CEED_TRANSPOSE) 68382b77998SStan Tomov return CeedError(basis->ceed, 1, 68482b77998SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 68582b77998SStan Tomov CeedInt Q = basis->Q1d; 68682b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 68782b77998SStan Tomov CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 68882b77998SStan Tomov for (CeedInt i=0; i<pre; i++) { 68982b77998SStan Tomov for (CeedInt j=0; j<Q; j++) { 69082b77998SStan Tomov for (CeedInt k=0; k<post; k++) { 69182b77998SStan Tomov v[(i*Q + j)*post + k] = basis->qweight1d[j] 69282b77998SStan Tomov * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 69382b77998SStan Tomov } 69482b77998SStan Tomov } 69582b77998SStan Tomov } 69682b77998SStan Tomov } 69782b77998SStan Tomov } 69816383ffcSjeremylt if (U) { 69916383ffcSjeremylt ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr); 70016383ffcSjeremylt } 70116383ffcSjeremylt ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr); 70282b77998SStan Tomov return 0; 70382b77998SStan Tomov } 70482b77998SStan Tomov 70582b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) { 70682b77998SStan Tomov return 0; 70782b77998SStan Tomov } 70882b77998SStan Tomov 709667bc5fcSjeremylt static int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, 71082b77998SStan Tomov CeedInt Q1d, const CeedScalar *interp1d, 71182b77998SStan Tomov const CeedScalar *grad1d, 71282b77998SStan Tomov const CeedScalar *qref1d, 71382b77998SStan Tomov const CeedScalar *qweight1d, 71482b77998SStan Tomov CeedBasis basis) { 71582b77998SStan Tomov basis->Apply = CeedBasisApply_Magma; 71682b77998SStan Tomov basis->Destroy = CeedBasisDestroy_Magma; 71782b77998SStan Tomov return 0; 71882b77998SStan Tomov } 71982b77998SStan Tomov 720667bc5fcSjeremylt static int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, 721a8de75f0Sjeremylt CeedInt ndof, CeedInt nqpts, 722a8de75f0Sjeremylt const CeedScalar *interp, 723a8de75f0Sjeremylt const CeedScalar *grad, 724a8de75f0Sjeremylt const CeedScalar *qref, 725a8de75f0Sjeremylt const CeedScalar *qweight, 726a8de75f0Sjeremylt CeedBasis basis) { 727a8de75f0Sjeremylt return CeedError(basis->ceed, 1, "Backend does not implement non-tensor bases"); 728a8de75f0Sjeremylt } 729a8de75f0Sjeremylt 7303bfcb0deSjeremylt static int CeedQFunctionApply_Magma(CeedQFunction qf, CeedInt Q, 73116383ffcSjeremylt CeedVector *U, CeedVector *V) { 73282b77998SStan Tomov int ierr; 73316383ffcSjeremylt CeedQFunction_Ref *impl; 73416383ffcSjeremylt ierr = CeedQFunctionGetData(qf, (void *)&impl); CeedChk(ierr); 73516383ffcSjeremylt 73616383ffcSjeremylt void *ctx; 73716383ffcSjeremylt ierr = CeedQFunctionGetContext(qf, &ctx); CeedChk(ierr); 73816383ffcSjeremylt 73916383ffcSjeremylt int (*f)() = NULL; 74016383ffcSjeremylt ierr = CeedQFunctionGetUserFunction(qf, (int (* *)())&f); CeedChk(ierr); 74116383ffcSjeremylt 74216383ffcSjeremylt CeedInt nIn, nOut; 74316383ffcSjeremylt ierr = CeedQFunctionGetNumArgs(qf, &nIn, &nOut); CeedChk(ierr); 74416383ffcSjeremylt 74516383ffcSjeremylt for (int i = 0; i<nIn; i++) { 74616383ffcSjeremylt if (U[i]) { 74716383ffcSjeremylt ierr = CeedVectorGetArrayRead(U[i], CEED_MEM_HOST, &impl->inputs[i]); 74816383ffcSjeremylt CeedChk(ierr); 74916383ffcSjeremylt } 75016383ffcSjeremylt } 75116383ffcSjeremylt for (int i = 0; i<nOut; i++) { 75216383ffcSjeremylt if (U[i]) { 75316383ffcSjeremylt ierr = CeedVectorGetArray(V[i], CEED_MEM_HOST, &impl->outputs[i]); 75416383ffcSjeremylt CeedChk(ierr); 75516383ffcSjeremylt } 75616383ffcSjeremylt } 75716383ffcSjeremylt 75816383ffcSjeremylt ierr = f(ctx, Q, impl->inputs, impl->outputs); CeedChk(ierr); 75916383ffcSjeremylt 76016383ffcSjeremylt for (int i = 0; i<nIn; i++) { 76116383ffcSjeremylt if (U[i]) { 76216383ffcSjeremylt ierr = CeedVectorRestoreArrayRead(U[i], &impl->inputs[i]); CeedChk(ierr); 76316383ffcSjeremylt } 76416383ffcSjeremylt } 76516383ffcSjeremylt for (int i = 0; i<nOut; i++) { 76616383ffcSjeremylt if (U[i]) { 76716383ffcSjeremylt ierr = CeedVectorRestoreArray(V[i], &impl->outputs[i]); CeedChk(ierr); 76816383ffcSjeremylt } 76916383ffcSjeremylt } 77082b77998SStan Tomov return 0; 77182b77998SStan Tomov } 77282b77998SStan Tomov 77382b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 77416383ffcSjeremylt int ierr; 77516383ffcSjeremylt CeedQFunction_Magma *impl; 77616383ffcSjeremylt ierr = CeedQFunctionGetData(qf, (void *)&impl); CeedChk(ierr); 77716383ffcSjeremylt 77816383ffcSjeremylt ierr = CeedFree(&impl->inputs); CeedChk(ierr); 77916383ffcSjeremylt ierr = CeedFree(&impl->outputs); CeedChk(ierr); 78016383ffcSjeremylt ierr = CeedFree(&impl); CeedChk(ierr); 78116383ffcSjeremylt 78282b77998SStan Tomov return 0; 78382b77998SStan Tomov } 78482b77998SStan Tomov 78582b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 78616383ffcSjeremylt int ierr; 78716383ffcSjeremylt Ceed ceed; 78816383ffcSjeremylt ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChk(ierr); 78916383ffcSjeremylt 79016383ffcSjeremylt CeedQFunction_Magma *impl; 79116383ffcSjeremylt ierr = CeedCalloc(1, &impl); CeedChk(ierr); 79216383ffcSjeremylt ierr = CeedCalloc(16, &impl->inputs); CeedChk(ierr); 79316383ffcSjeremylt ierr = CeedCalloc(16, &impl->outputs); CeedChk(ierr); 79416383ffcSjeremylt ierr = CeedQFunctionSetData(qf, (void *)&impl); CeedChk(ierr); 79516383ffcSjeremylt 79682b77998SStan Tomov qf->Apply = CeedQFunctionApply_Magma; 79782b77998SStan Tomov qf->Destroy = CeedQFunctionDestroy_Magma; 79882b77998SStan Tomov return 0; 79982b77998SStan Tomov } 80082b77998SStan Tomov 80182b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) { 80282b77998SStan Tomov int ierr; 80316383ffcSjeremylt CeedOperator_Magma *impl; 80416383ffcSjeremylt ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 80582b77998SStan Tomov 8063bfcb0deSjeremylt for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 80716383ffcSjeremylt if (impl->Evecs[i]) { 80816383ffcSjeremylt ierr = CeedVectorDestroy(&impl->Evecs[i]); CeedChk(ierr); 8093bfcb0deSjeremylt } 8103bfcb0deSjeremylt } 81116383ffcSjeremylt ierr = CeedFree(&impl->Evecs); CeedChk(ierr); 81216383ffcSjeremylt ierr = CeedFree(&impl->Edata); CeedChk(ierr); 81339dea11bSjeremylt 81416383ffcSjeremylt for (CeedInt i=0; i<impl->numein; i++) { 81516383ffcSjeremylt ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 81616383ffcSjeremylt ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 81716383ffcSjeremylt } 81816383ffcSjeremylt ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 81916383ffcSjeremylt ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 8203bfcb0deSjeremylt 82116383ffcSjeremylt for (CeedInt i=0; i<impl->numeout; i++) { 82216383ffcSjeremylt ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 82316383ffcSjeremylt ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 82416383ffcSjeremylt } 82516383ffcSjeremylt ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 82616383ffcSjeremylt ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 8273bfcb0deSjeremylt 82816383ffcSjeremylt 82916383ffcSjeremylt ierr = CeedFree(&impl); CeedChk(ierr); 83082b77998SStan Tomov return 0; 83182b77998SStan Tomov } 83282b77998SStan Tomov 83308ed4d02Sjeremylt 8343bfcb0deSjeremylt /* 8353bfcb0deSjeremylt Setup infields or outfields 8363bfcb0deSjeremylt */ 83716383ffcSjeremylt static int CeedOperatorSetupFields_Magma(CeedQFunction qf, CeedOperator op, 83816383ffcSjeremylt bool inOrOut, 83916383ffcSjeremylt CeedVector *fullevecs, CeedVector *evecs, 84016383ffcSjeremylt CeedVector *qvecs, CeedInt starte, 841135a076eSjeremylt CeedInt numfields, CeedInt Q) { 84216383ffcSjeremylt CeedInt dim = 1, ierr, ncomp; 84316383ffcSjeremylt Ceed ceed; 84416383ffcSjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 84516383ffcSjeremylt CeedQFunction_Magma *qf_data; 84616383ffcSjeremylt ierr = CeedQFunctionGetData(qf, (void *)&qf_data); CeedChk(ierr); 84716383ffcSjeremylt CeedBasis basis; 84816383ffcSjeremylt CeedElemRestriction Erestrict; 84916383ffcSjeremylt CeedOperatorField *opfields; 85016383ffcSjeremylt CeedQFunctionField *qffields; 85116383ffcSjeremylt if (inOrOut) { 85216383ffcSjeremylt ierr = CeedOperatorGetFields(op, NULL, &opfields); 85316383ffcSjeremylt CeedChk(ierr); 85416383ffcSjeremylt ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 85516383ffcSjeremylt CeedChk(ierr); 85616383ffcSjeremylt } else { 85716383ffcSjeremylt ierr = CeedOperatorGetFields(op, &opfields, NULL); 85816383ffcSjeremylt CeedChk(ierr); 85916383ffcSjeremylt ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 86016383ffcSjeremylt CeedChk(ierr); 86116383ffcSjeremylt } 86282b77998SStan Tomov 8633bfcb0deSjeremylt // Loop over fields 8643bfcb0deSjeremylt for (CeedInt i=0; i<numfields; i++) { 86516383ffcSjeremylt CeedEvalMode emode; 86616383ffcSjeremylt ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 867135a076eSjeremylt if (emode != CEED_EVAL_WEIGHT) { 86816383ffcSjeremylt ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &Erestrict); 869135a076eSjeremylt CeedChk(ierr); 87016383ffcSjeremylt ierr = CeedElemRestrictionCreateVector(Erestrict, NULL, &fullevecs[i+starte]); 87116383ffcSjeremylt CeedChk(ierr); 87216383ffcSjeremylt } else { 873135a076eSjeremylt } 8743bfcb0deSjeremylt switch(emode) { 8753bfcb0deSjeremylt case CEED_EVAL_NONE: 87616383ffcSjeremylt ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 87716383ffcSjeremylt CeedChk(ierr); 87816383ffcSjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp, &evecs[i]); CeedChk(ierr); 87916383ffcSjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp, &qvecs[i]); CeedChk(ierr); 8803bfcb0deSjeremylt break; // No action 8813bfcb0deSjeremylt case CEED_EVAL_INTERP: 88216383ffcSjeremylt ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 88316383ffcSjeremylt ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 88416383ffcSjeremylt CeedChk(ierr); 88516383ffcSjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp, &evecs[i]); CeedChk(ierr); 88616383ffcSjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp, &qvecs[i]); CeedChk(ierr); 8873bfcb0deSjeremylt break; 8883bfcb0deSjeremylt case CEED_EVAL_GRAD: 88916383ffcSjeremylt ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 89016383ffcSjeremylt ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 89116383ffcSjeremylt ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 89216383ffcSjeremylt ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp); 89316383ffcSjeremylt CeedChk(ierr); 89416383ffcSjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp, &evecs[i]); CeedChk(ierr); 89516383ffcSjeremylt ierr = CeedVectorCreate(ceed, Q*ncomp*dim, &qvecs[i]); CeedChk(ierr); 8963bfcb0deSjeremylt break; 8973bfcb0deSjeremylt case CEED_EVAL_WEIGHT: // Only on input fields 89816383ffcSjeremylt ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 89916383ffcSjeremylt ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 90016383ffcSjeremylt ierr = CeedVectorCreate(ceed, Q, &qvecs[i]); CeedChk(ierr); 90116383ffcSjeremylt ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 90216383ffcSjeremylt NULL, qvecs[i]); CeedChk(ierr); 90316383ffcSjeremylt assert(starte==0); 9043bfcb0deSjeremylt break; 90516383ffcSjeremylt case CEED_EVAL_DIV: break; // Not implemented 90616383ffcSjeremylt case CEED_EVAL_CURL: break; // Not implemented 90782b77998SStan Tomov } 9083bfcb0deSjeremylt } 90982b77998SStan Tomov return 0; 91082b77998SStan Tomov } 91182b77998SStan Tomov 9123bfcb0deSjeremylt /* 9133bfcb0deSjeremylt CeedOperator needs to connect all the named fields (be they active or passive) 9143bfcb0deSjeremylt to the named inputs and outputs of its CeedQFunction. 9153bfcb0deSjeremylt */ 9163bfcb0deSjeremylt static int CeedOperatorSetup_Magma(CeedOperator op) { 91782b77998SStan Tomov int ierr; 91816383ffcSjeremylt bool setupdone; 91916383ffcSjeremylt ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 92016383ffcSjeremylt if (setupdone) return 0; 92116383ffcSjeremylt Ceed ceed; 92216383ffcSjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 92316383ffcSjeremylt CeedOperator_Magma *data; 92416383ffcSjeremylt ierr = CeedOperatorGetData(op, (void *)&data); CeedChk(ierr); 92516383ffcSjeremylt CeedQFunction qf; 92616383ffcSjeremylt ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 92716383ffcSjeremylt CeedInt Q, numinputfields, numoutputfields; 92816383ffcSjeremylt ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 92916383ffcSjeremylt ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 93016383ffcSjeremylt CeedOperatorField *opinputfields, *opoutputfields; 93116383ffcSjeremylt ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 93216383ffcSjeremylt CeedChk(ierr); 93316383ffcSjeremylt CeedQFunctionField *qfinputfields, *qfoutputfields; 93416383ffcSjeremylt ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 93516383ffcSjeremylt CeedChk(ierr); 93682b77998SStan Tomov 93716383ffcSjeremylt data->numein = numinputfields; 93816383ffcSjeremylt data->numeout = numoutputfields; 9393bfcb0deSjeremylt 9403bfcb0deSjeremylt // Allocate 94116383ffcSjeremylt const CeedInt numIO = numinputfields + numoutputfields; 94216383ffcSjeremylt 94316383ffcSjeremylt ierr = CeedCalloc(numinputfields + numoutputfields, &data->Evecs); 94416383ffcSjeremylt CeedChk(ierr); 94516383ffcSjeremylt ierr = CeedCalloc(numinputfields + numoutputfields, &data->Edata); 9463bfcb0deSjeremylt CeedChk(ierr); 9473bfcb0deSjeremylt 94816383ffcSjeremylt ierr = CeedCalloc(16, &data->evecsin); CeedChk(ierr); 94916383ffcSjeremylt ierr = CeedCalloc(16, &data->evecsout); CeedChk(ierr); 95016383ffcSjeremylt ierr = CeedCalloc(16, &data->qvecsin); CeedChk(ierr); 95116383ffcSjeremylt ierr = CeedCalloc(16, &data->qvecsout); CeedChk(ierr); 9523bfcb0deSjeremylt 9533bfcb0deSjeremylt // Set up infield and outfield pointer arrays 9543bfcb0deSjeremylt // Infields 95516383ffcSjeremylt ierr = CeedOperatorSetupFields_Magma(qf, op, 0, data->Evecs, 95616383ffcSjeremylt data->evecsin, data->qvecsin, 0, 95716383ffcSjeremylt numinputfields, Q); 95816383ffcSjeremylt CeedChk(ierr); 9593bfcb0deSjeremylt // Outfields 96016383ffcSjeremylt ierr = CeedOperatorSetupFields_Magma(qf, op, 1, data->Evecs, 96116383ffcSjeremylt data->evecsout, data->qvecsout, 96216383ffcSjeremylt numinputfields, numoutputfields, Q); 96316383ffcSjeremylt CeedChk(ierr); 96416383ffcSjeremylt ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 9653bfcb0deSjeremylt return 0; 9663bfcb0deSjeremylt } 9673bfcb0deSjeremylt 96868e7ed8aSjeremylt static int CeedOperatorApply_Magma(CeedOperator op, CeedVector invec, 9693bfcb0deSjeremylt CeedVector outvec, CeedRequest *request) { 9703bfcb0deSjeremylt int ierr; 97116383ffcSjeremylt Ceed ceed; 97216383ffcSjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 97316383ffcSjeremylt CeedOperator_Magma *data; 97416383ffcSjeremylt ierr = CeedOperatorGetData(op, (void *)&data); CeedChk(ierr); 97516383ffcSjeremylt //CeedVector *E = data->Evecs, *D = data->D, outvec; 97616383ffcSjeremylt CeedInt Q, elemsize, numelements, numinputfields, numoutputfields, ncomp; 97716383ffcSjeremylt ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 97816383ffcSjeremylt ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 97916383ffcSjeremylt CeedQFunction qf; 98016383ffcSjeremylt ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 98116383ffcSjeremylt CeedTransposeMode lmode; 98216383ffcSjeremylt CeedOperatorField *opinputfields, *opoutputfields; 98316383ffcSjeremylt ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 98416383ffcSjeremylt CeedChk(ierr); 98516383ffcSjeremylt CeedQFunctionField *qfinputfields, *qfoutputfields; 98616383ffcSjeremylt ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 98716383ffcSjeremylt CeedChk(ierr); 98816383ffcSjeremylt CeedEvalMode emode; 98916383ffcSjeremylt CeedVector vec; 99016383ffcSjeremylt CeedBasis basis; 99116383ffcSjeremylt CeedElemRestriction Erestrict; 9923bfcb0deSjeremylt 99368e7ed8aSjeremylt ierr = CeedOperatorSetup_Magma(op); CeedChk(ierr); 9949f0427d9SYohann ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 9959f0427d9SYohann CeedChk(ierr); 9963bfcb0deSjeremylt 9973bfcb0deSjeremylt // Input Evecs and Restriction 99816383ffcSjeremylt for (CeedInt i=0; i<numinputfields; i++) { 99916383ffcSjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 10000f5de9e9Sjeremylt CeedChk(ierr); 100116383ffcSjeremylt if (emode & CEED_EVAL_WEIGHT) { 100216383ffcSjeremylt } else { // Restriction 100316383ffcSjeremylt // Get input vector 100416383ffcSjeremylt ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 100516383ffcSjeremylt if (vec == CEED_VECTOR_ACTIVE) 100616383ffcSjeremylt vec = invec; 1007668048e2SJed Brown // Restrict 100816383ffcSjeremylt ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 100916383ffcSjeremylt CeedChk(ierr); 101016383ffcSjeremylt ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); CeedChk(ierr); 101116383ffcSjeremylt ierr = CeedElemRestrictionApply(Erestrict, CEED_NOTRANSPOSE, 101216383ffcSjeremylt lmode, vec, data->Evecs[i], 1013668048e2SJed Brown request); CeedChk(ierr); 1014668048e2SJed Brown // Get evec 101516383ffcSjeremylt ierr = CeedVectorGetArrayRead(data->Evecs[i], CEED_MEM_HOST, 101616383ffcSjeremylt (const CeedScalar **) &data->Edata[i]); 101716383ffcSjeremylt CeedChk(ierr); 10183bfcb0deSjeremylt } 10193bfcb0deSjeremylt } 10203bfcb0deSjeremylt 10213bfcb0deSjeremylt // Output Evecs 102216383ffcSjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 102316383ffcSjeremylt ierr = CeedVectorGetArray(data->Evecs[i+data->numein], CEED_MEM_HOST, 102416383ffcSjeremylt &data->Edata[i + numinputfields]); CeedChk(ierr); 10253bfcb0deSjeremylt } 10263bfcb0deSjeremylt 10273bfcb0deSjeremylt // Loop through elements 102816383ffcSjeremylt for (CeedInt e=0; e<numelements; e++) { 10293bfcb0deSjeremylt // Input basis apply if needed 103016383ffcSjeremylt for (CeedInt i=0; i<numinputfields; i++) { 1031135a076eSjeremylt // Get elemsize, emode, ncomp 103216383ffcSjeremylt ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 103316383ffcSjeremylt CeedChk(ierr); 103416383ffcSjeremylt ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 103516383ffcSjeremylt CeedChk(ierr); 103616383ffcSjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 103716383ffcSjeremylt CeedChk(ierr); 103816383ffcSjeremylt ierr = CeedQFunctionFieldGetNumComponents(qfinputfields[i], &ncomp); 103916383ffcSjeremylt CeedChk(ierr); 10403bfcb0deSjeremylt // Basis action 10413bfcb0deSjeremylt switch(emode) { 10423bfcb0deSjeremylt case CEED_EVAL_NONE: 104316383ffcSjeremylt ierr = CeedVectorSetArray(data->qvecsin[i], CEED_MEM_HOST, 104416383ffcSjeremylt CEED_USE_POINTER, 104516383ffcSjeremylt &data->Edata[i][e*Q*ncomp]); CeedChk(ierr); 10463bfcb0deSjeremylt break; 10473bfcb0deSjeremylt case CEED_EVAL_INTERP: 104816383ffcSjeremylt ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 104916383ffcSjeremylt ierr = CeedVectorSetArray(data->evecsin[i], CEED_MEM_HOST, 105016383ffcSjeremylt CEED_USE_POINTER, 105116383ffcSjeremylt &data->Edata[i][e*elemsize*ncomp]); 10523bfcb0deSjeremylt CeedChk(ierr); 105316383ffcSjeremylt ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, 105416383ffcSjeremylt CEED_EVAL_INTERP, data->evecsin[i], 105516383ffcSjeremylt data->qvecsin[i]); CeedChk(ierr); 10563bfcb0deSjeremylt break; 10573bfcb0deSjeremylt case CEED_EVAL_GRAD: 105816383ffcSjeremylt ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 105916383ffcSjeremylt ierr = CeedVectorSetArray(data->evecsin[i], CEED_MEM_HOST, 106016383ffcSjeremylt CEED_USE_POINTER, 106116383ffcSjeremylt &data->Edata[i][e*elemsize*ncomp]); 10623bfcb0deSjeremylt CeedChk(ierr); 106316383ffcSjeremylt ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, 106416383ffcSjeremylt CEED_EVAL_GRAD, data->evecsin[i], 106516383ffcSjeremylt data->qvecsin[i]); CeedChk(ierr); 10663bfcb0deSjeremylt break; 10673bfcb0deSjeremylt case CEED_EVAL_WEIGHT: 10683bfcb0deSjeremylt break; // No action 10693bfcb0deSjeremylt case CEED_EVAL_DIV: 107016383ffcSjeremylt break; // Not implemented 10713bfcb0deSjeremylt case CEED_EVAL_CURL: 107216383ffcSjeremylt break; // Not implemented 10733bfcb0deSjeremylt } 10743bfcb0deSjeremylt } 10753bfcb0deSjeremylt // Output pointers 107616383ffcSjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 107716383ffcSjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 107816383ffcSjeremylt CeedChk(ierr); 10793bfcb0deSjeremylt if (emode == CEED_EVAL_NONE) { 108016383ffcSjeremylt ierr = CeedQFunctionFieldGetNumComponents(qfoutputfields[i], &ncomp); 108116383ffcSjeremylt CeedChk(ierr); 108216383ffcSjeremylt ierr = CeedVectorSetArray(data->qvecsout[i], CEED_MEM_HOST, 108316383ffcSjeremylt CEED_USE_POINTER, 108416383ffcSjeremylt &data->Edata[i + numinputfields][e*Q*ncomp]); 108516383ffcSjeremylt CeedChk(ierr); 108616383ffcSjeremylt } 108716383ffcSjeremylt if (emode == CEED_EVAL_INTERP) { 108816383ffcSjeremylt } 108916383ffcSjeremylt if (emode == CEED_EVAL_GRAD) { 109016383ffcSjeremylt } 109116383ffcSjeremylt if (emode == CEED_EVAL_WEIGHT) { 10923bfcb0deSjeremylt } 10933bfcb0deSjeremylt } 109416383ffcSjeremylt 10953bfcb0deSjeremylt // Q function 109616383ffcSjeremylt ierr = CeedQFunctionApply(qf, Q, data->qvecsin, data->qvecsout); CeedChk(ierr); 10973bfcb0deSjeremylt 10983bfcb0deSjeremylt // Output basis apply if needed 109916383ffcSjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 1100135a076eSjeremylt // Get elemsize, emode, ncomp 110116383ffcSjeremylt ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 110216383ffcSjeremylt CeedChk(ierr); 110316383ffcSjeremylt ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 110416383ffcSjeremylt CeedChk(ierr); 110516383ffcSjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 110616383ffcSjeremylt CeedChk(ierr); 110716383ffcSjeremylt ierr = CeedQFunctionFieldGetNumComponents(qfoutputfields[i], &ncomp); 110816383ffcSjeremylt CeedChk(ierr); 11093bfcb0deSjeremylt // Basis action 11103bfcb0deSjeremylt switch(emode) { 11113bfcb0deSjeremylt case CEED_EVAL_NONE: 11123bfcb0deSjeremylt break; // No action 11133bfcb0deSjeremylt case CEED_EVAL_INTERP: 111416383ffcSjeremylt ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 111516383ffcSjeremylt CeedChk(ierr); 111616383ffcSjeremylt ierr = CeedVectorSetArray(data->evecsout[i], CEED_MEM_HOST, 111716383ffcSjeremylt CEED_USE_POINTER, 111816383ffcSjeremylt &data->Edata[i + numinputfields][e*elemsize*ncomp]); 111916383ffcSjeremylt ierr = CeedBasisApply(basis, 1, CEED_TRANSPOSE, 112016383ffcSjeremylt CEED_EVAL_INTERP, data->qvecsout[i], 112116383ffcSjeremylt data->evecsout[i]); CeedChk(ierr); 11223bfcb0deSjeremylt break; 11233bfcb0deSjeremylt case CEED_EVAL_GRAD: 112416383ffcSjeremylt ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 11253bfcb0deSjeremylt CeedChk(ierr); 112616383ffcSjeremylt ierr = CeedVectorSetArray(data->evecsout[i], CEED_MEM_HOST, 112716383ffcSjeremylt CEED_USE_POINTER, 112816383ffcSjeremylt &data->Edata[i + numinputfields][e*elemsize*ncomp]); 112916383ffcSjeremylt ierr = CeedBasisApply(basis, 1, CEED_TRANSPOSE, 113016383ffcSjeremylt CEED_EVAL_GRAD, data->qvecsout[i], 113116383ffcSjeremylt data->evecsout[i]); CeedChk(ierr); 11323bfcb0deSjeremylt break; 113316383ffcSjeremylt case CEED_EVAL_WEIGHT: break; // Should not occur 113416383ffcSjeremylt case CEED_EVAL_DIV: break; // Not implemented 113516383ffcSjeremylt case CEED_EVAL_CURL: break; // Not implemented 11363bfcb0deSjeremylt } 11373bfcb0deSjeremylt } 113816383ffcSjeremylt } // numelements 113916383ffcSjeremylt 114016383ffcSjeremylt // Zero lvecs 114116383ffcSjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 114216383ffcSjeremylt ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 114316383ffcSjeremylt if (vec == CEED_VECTOR_ACTIVE) 114416383ffcSjeremylt vec = outvec; 114516383ffcSjeremylt ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr); 11463bfcb0deSjeremylt } 11473bfcb0deSjeremylt 11483bfcb0deSjeremylt // Output restriction 114916383ffcSjeremylt for (CeedInt i=0; i<numoutputfields; i++) { 115016383ffcSjeremylt // Restore evec 115116383ffcSjeremylt ierr = CeedVectorRestoreArray(data->Evecs[i+data->numein], 115216383ffcSjeremylt &data->Edata[i + numinputfields]); 115316383ffcSjeremylt CeedChk(ierr); 115416383ffcSjeremylt // Get output vector 115516383ffcSjeremylt ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 1156668048e2SJed Brown // Active 115716383ffcSjeremylt if (vec == CEED_VECTOR_ACTIVE) 115816383ffcSjeremylt vec = outvec; 11596a35ea15Sjeremylt // Restrict 116016383ffcSjeremylt ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 11613bfcb0deSjeremylt CeedChk(ierr); 116216383ffcSjeremylt ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr); 116316383ffcSjeremylt ierr = CeedElemRestrictionApply(Erestrict, CEED_TRANSPOSE, 116416383ffcSjeremylt lmode, data->Evecs[i+data->numein], vec, 1165668048e2SJed Brown request); CeedChk(ierr); 11663bfcb0deSjeremylt } 11673bfcb0deSjeremylt 11686a35ea15Sjeremylt // Restore input arrays 116916383ffcSjeremylt for (CeedInt i=0; i<numinputfields; i++) { 117016383ffcSjeremylt ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 117116383ffcSjeremylt CeedChk(ierr); 11726a35ea15Sjeremylt if (emode & CEED_EVAL_WEIGHT) { 11736a35ea15Sjeremylt } else { 117416383ffcSjeremylt // Restriction 117516383ffcSjeremylt ierr = CeedVectorRestoreArrayRead(data->Evecs[i], 117616383ffcSjeremylt (const CeedScalar **) &data->Edata[i]); 117716383ffcSjeremylt CeedChk(ierr); 11786a35ea15Sjeremylt } 11796a35ea15Sjeremylt } 118082b77998SStan Tomov return 0; 118182b77998SStan Tomov } 118282b77998SStan Tomov 118382b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) { 118482b77998SStan Tomov int ierr; 118516383ffcSjeremylt CeedOperator_Magma *impl; 118616383ffcSjeremylt Ceed ceed; 118716383ffcSjeremylt ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 118882b77998SStan Tomov 118982b77998SStan Tomov ierr = CeedCalloc(1, &impl); CeedChk(ierr); 119016383ffcSjeremylt ierr = CeedOperatorSetData(op, (void *)&impl); 119116383ffcSjeremylt 119216383ffcSjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply", 119316383ffcSjeremylt CeedOperatorApply_Magma); CeedChk(ierr); 119416383ffcSjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 119516383ffcSjeremylt CeedOperatorDestroy_Magma); CeedChk(ierr); 119682b77998SStan Tomov return 0; 119782b77998SStan Tomov } 119882b77998SStan Tomov 1199*52d6035fSJeremy L Thompson int CeedCompositeOperatorCreate_Magma(CeedOperator op) { 1200*52d6035fSJeremy L Thompson int ierr; 1201*52d6035fSJeremy L Thompson Ceed ceed; 1202*52d6035fSJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 1203*52d6035fSJeremy L Thompson return CeedError(ceed, 1, "Backend does not support composite operators"); 1204*52d6035fSJeremy L Thompson } 1205*52d6035fSJeremy L Thompson 120682b77998SStan Tomov // ***************************************************************************** 120782b77998SStan Tomov // * INIT 120882b77998SStan Tomov // ***************************************************************************** 120982b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) { 12101dc2661bSVeselin Dobrev int ierr; 12112a847359SStan Tomov if (strcmp(resource, "/gpu/magma")) 121282b77998SStan Tomov return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 121393fbe329SStan Tomov 12141dc2661bSVeselin Dobrev ierr = magma_init(); 12151dc2661bSVeselin Dobrev if (ierr) return CeedError(ceed, 1, "error in magma_init(): %d\n", ierr); 121693fbe329SStan Tomov //magma_print_environment(); 121793fbe329SStan Tomov 121882b77998SStan Tomov ceed->VecCreate = CeedVectorCreate_Magma; 121982b77998SStan Tomov ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 1220a8de75f0Sjeremylt ceed->BasisCreateH1 = CeedBasisCreateH1_Magma; 122182b77998SStan Tomov ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 12224cc6aec3Sjeremylt ceed->ElemRestrictionCreateBlocked = CeedElemRestrictionCreateBlocked_Magma; 122382b77998SStan Tomov ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 122482b77998SStan Tomov ceed->OperatorCreate = CeedOperatorCreate_Magma; 1225*52d6035fSJeremy L Thompson ceed->CompositeOperatorCreate = CeedCompositeOperatorCreate_Magma; 122682b77998SStan Tomov return 0; 122782b77998SStan Tomov } 122882b77998SStan Tomov 122982b77998SStan Tomov // ***************************************************************************** 123082b77998SStan Tomov // * REGISTER 123182b77998SStan Tomov // ***************************************************************************** 123282b77998SStan Tomov __attribute__((constructor)) 123382b77998SStan Tomov static void Register(void) { 123444951fc6Sjeremylt CeedRegister("/gpu/magma", CeedInit_Magma, 20); 123582b77998SStan Tomov } 1236