182b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 282b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 382b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details. 482b77998SStan Tomov // 582b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 682b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 782b77998SStan Tomov // element discretizations for exascale applications. For more information and 882b77998SStan Tomov // source code availability see http://github.com/ceed. 982b77998SStan Tomov // 1082b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1182b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 1282b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 1382b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 1482b77998SStan Tomov // software, applications, hardware, advanced system engineering and early 1582b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 1682b77998SStan Tomov 1782b77998SStan Tomov #include <ceed-impl.h> 1882b77998SStan Tomov #include <string.h> 19*93fbe329SStan Tomov #include "magma.h" 2082b77998SStan Tomov 2182b77998SStan Tomov typedef struct { 2282b77998SStan Tomov CeedScalar *array; 23*93fbe329SStan Tomov CeedScalar *darray; 2482b77998SStan Tomov } CeedVector_Magma; 2582b77998SStan Tomov 2682b77998SStan Tomov typedef struct { 2782b77998SStan Tomov const CeedInt *indices; 2882b77998SStan Tomov CeedInt *indices_allocated; 2982b77998SStan Tomov } CeedElemRestriction_Magma; 3082b77998SStan Tomov 3182b77998SStan Tomov typedef struct { 3282b77998SStan Tomov CeedVector etmp; 3382b77998SStan Tomov CeedVector qdata; 3482b77998SStan Tomov } CeedOperator_Magma; 3582b77998SStan Tomov 36*93fbe329SStan Tomov // ***************************************************************************** 37*93fbe329SStan Tomov // * Initialize vector vec (after free mem) with values from array based on cmode 38*93fbe329SStan Tomov // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 39*93fbe329SStan Tomov // * to array, and data is copied (not store passed pointer) 40*93fbe329SStan Tomov // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 41*93fbe329SStan Tomov // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 42*93fbe329SStan Tomov // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 43*93fbe329SStan Tomov // ***************************************************************************** 4482b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 4582b77998SStan Tomov CeedCopyMode cmode, CeedScalar *array) { 4682b77998SStan Tomov CeedVector_Magma *impl = vec->data; 4782b77998SStan Tomov int ierr; 4882b77998SStan Tomov 4982b77998SStan Tomov if (mtype != CEED_MEM_HOST) 5082b77998SStan Tomov return CeedError(vec->ceed, 1, "Only MemType = HOST supported"); 51*93fbe329SStan Tomov 5282b77998SStan Tomov switch (cmode) { 5382b77998SStan Tomov case CEED_COPY_VALUES: 54*93fbe329SStan Tomov if (impl->darray == NULL) { 55*93fbe329SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 56*93fbe329SStan Tomov vec->length * sizeof(CeedScalar)); 57*93fbe329SStan Tomov CeedChk(ierr); 58*93fbe329SStan Tomov } 59*93fbe329SStan Tomov if (array) 60*93fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 61*93fbe329SStan Tomov array, 1, impl->darray, 1); 6282b77998SStan Tomov break; 6382b77998SStan Tomov case CEED_OWN_POINTER: 64*93fbe329SStan Tomov if (impl->darray == NULL){ 65*93fbe329SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 66*93fbe329SStan Tomov vec->length * sizeof(CeedScalar)); 67*93fbe329SStan Tomov CeedChk(ierr); 68*93fbe329SStan Tomov } 69*93fbe329SStan Tomov if (array) 70*93fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 71*93fbe329SStan Tomov array, 1, impl->darray, 1); 72*93fbe329SStan Tomov 73*93fbe329SStan Tomov ierr = CeedFree(array); CeedChk(ierr); 7482b77998SStan Tomov break; 7582b77998SStan Tomov case CEED_USE_POINTER: 76*93fbe329SStan Tomov if (impl->darray != NULL){ 77*93fbe329SStan Tomov ierr = magma_free(impl->darray); 78*93fbe329SStan Tomov CeedChk(ierr); 79*93fbe329SStan Tomov impl->darray = NULL; 80*93fbe329SStan Tomov } 8182b77998SStan Tomov impl->array = array; 8282b77998SStan Tomov } 8382b77998SStan Tomov return 0; 8482b77998SStan Tomov } 8582b77998SStan Tomov 86*93fbe329SStan Tomov // ***************************************************************************** 87*93fbe329SStan Tomov // * Get data from Device vector vec into CeedScalar array on the CPU 88*93fbe329SStan Tomov // ***************************************************************************** 8982b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 9082b77998SStan Tomov CeedScalar **array) { 9182b77998SStan Tomov CeedVector_Magma *impl = vec->data; 9282b77998SStan Tomov int ierr; 9382b77998SStan Tomov 9482b77998SStan Tomov if (mtype != CEED_MEM_HOST) 9582b77998SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST memory"); 96*93fbe329SStan Tomov 9782b77998SStan Tomov if (!impl->array) { // Allocate if array is not yet allocated 98*93fbe329SStan Tomov ierr = CeedMalloc(vec->length, &impl->array); CeedChk(ierr); 99*93fbe329SStan Tomov } 100*93fbe329SStan Tomov if (!impl->darray){ // Get data from the Device if owned (darray!=NULL) 101*93fbe329SStan Tomov magma_getvector(vec->length, sizeof(array[0]), 102*93fbe329SStan Tomov impl->darray, 1, array, 1); 10382b77998SStan Tomov } 10482b77998SStan Tomov *array = impl->array; 10582b77998SStan Tomov return 0; 10682b77998SStan Tomov } 10782b77998SStan Tomov 108*93fbe329SStan Tomov // ***************************************************************************** 109*93fbe329SStan Tomov // * Get data from Device vector vec into CeedScalar array on the CPU for reading 110*93fbe329SStan Tomov // ***************************************************************************** 11182b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 11282b77998SStan Tomov const CeedScalar **array) { 11382b77998SStan Tomov CeedVector_Magma *impl = vec->data; 11482b77998SStan Tomov int ierr; 11582b77998SStan Tomov 11682b77998SStan Tomov if (mtype != CEED_MEM_HOST) 11782b77998SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST memory"); 118*93fbe329SStan Tomov 11982b77998SStan Tomov if (!impl->array) { // Allocate if array is not yet allocated 120*93fbe329SStan Tomov ierr = CeedMalloc(vec->length, &impl->array); CeedChk(ierr); 12182b77998SStan Tomov } 122*93fbe329SStan Tomov if (!impl->darray){ // Get data from the Device if owned (darray!=NULL) 123*93fbe329SStan Tomov magma_getvector(vec->length, sizeof(array[0]), 124*93fbe329SStan Tomov impl->darray, 1, array, 1); 125*93fbe329SStan Tomov } 126*93fbe329SStan Tomov 12782b77998SStan Tomov *array = impl->array; 12882b77998SStan Tomov return 0; 12982b77998SStan Tomov } 13082b77998SStan Tomov 131*93fbe329SStan Tomov // ***************************************************************************** 132*93fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 133*93fbe329SStan Tomov // * from vec and possibly modified them. 134*93fbe329SStan Tomov // ***************************************************************************** 13582b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 136*93fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 137*93fbe329SStan Tomov 138*93fbe329SStan Tomov if (!impl->darray) {// vec owns the data, so it is on GPU 139*93fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 140*93fbe329SStan Tomov *array, 1, impl->darray, 1); 141*93fbe329SStan Tomov } 14282b77998SStan Tomov *array = NULL; 14382b77998SStan Tomov return 0; 14482b77998SStan Tomov } 14582b77998SStan Tomov 146*93fbe329SStan Tomov // ***************************************************************************** 147*93fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 148*93fbe329SStan Tomov // * from vec to only read them; in this case vec may have been modified meanwhile 149*93fbe329SStan Tomov // * and needs to be restored here. 150*93fbe329SStan Tomov // ***************************************************************************** 15182b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 15282b77998SStan Tomov const CeedScalar **array) { 153*93fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 154*93fbe329SStan Tomov 155*93fbe329SStan Tomov if (!impl->darray) {// vec owns the data, so it is on GPU 156*93fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 157*93fbe329SStan Tomov *array, 1, impl->darray, 1); 158*93fbe329SStan Tomov } 15982b77998SStan Tomov *array = NULL; 16082b77998SStan Tomov return 0; 16182b77998SStan Tomov } 16282b77998SStan Tomov 16382b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) { 16482b77998SStan Tomov CeedVector_Magma *impl = vec->data; 16582b77998SStan Tomov int ierr; 16682b77998SStan Tomov 167*93fbe329SStan Tomov if (!impl->array) {ierr = CeedFree(&impl->array); CeedChk(ierr);} 168*93fbe329SStan Tomov ierr = magma_free(impl->darray); CeedChk(ierr); 16982b77998SStan Tomov ierr = CeedFree(&vec->data); CeedChk(ierr); 17082b77998SStan Tomov return 0; 17182b77998SStan Tomov } 17282b77998SStan Tomov 173*93fbe329SStan Tomov // ***************************************************************************** 174*93fbe329SStan Tomov // * Create vector vec of size n 175*93fbe329SStan Tomov // ***************************************************************************** 17682b77998SStan Tomov static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 17782b77998SStan Tomov CeedVector_Magma *impl; 17882b77998SStan Tomov int ierr; 17982b77998SStan Tomov 18082b77998SStan Tomov vec->SetArray = CeedVectorSetArray_Magma; 18182b77998SStan Tomov vec->GetArray = CeedVectorGetArray_Magma; 18282b77998SStan Tomov vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 18382b77998SStan Tomov vec->RestoreArray = CeedVectorRestoreArray_Magma; 18482b77998SStan Tomov vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 18582b77998SStan Tomov vec->Destroy = CeedVectorDestroy_Magma; 18682b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 187*93fbe329SStan Tomov impl->darray = NULL; 18882b77998SStan Tomov vec->data = impl; 18982b77998SStan Tomov return 0; 19082b77998SStan Tomov } 19182b77998SStan Tomov 192*93fbe329SStan Tomov // ***************************************************************************** 193*93fbe329SStan Tomov // * Apply restriction operator r to u: v = r(rmode) u 194*93fbe329SStan Tomov // ***************************************************************************** 19582b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 19682b77998SStan Tomov CeedTransposeMode tmode, CeedInt ncomp, 19782b77998SStan Tomov CeedTransposeMode lmode, CeedVector u, 19882b77998SStan Tomov CeedVector v, CeedRequest *request) { 19982b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 20082b77998SStan Tomov int ierr; 20182b77998SStan Tomov const CeedScalar *uu; 20282b77998SStan Tomov CeedScalar *vv; 20382b77998SStan Tomov CeedInt esize = r->nelem*r->elemsize; 20482b77998SStan Tomov 20582b77998SStan Tomov ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 20682b77998SStan Tomov ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 20782b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 20882b77998SStan Tomov // Perform: v = r * u 20982b77998SStan Tomov if (ncomp == 1) { 21082b77998SStan Tomov for (CeedInt i=0; i<esize; i++) vv[i] = uu[impl->indices[i]]; 21182b77998SStan Tomov } else { 21282b77998SStan Tomov // vv is (elemsize x ncomp x nelem), column-major 21382b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 21482b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 21582b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 21682b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 21782b77998SStan Tomov vv[i+r->elemsize*(d+ncomp*e)] = 21882b77998SStan Tomov uu[impl->indices[i+r->elemsize*e]+r->ndof*d]; 21982b77998SStan Tomov } 22082b77998SStan Tomov } else { // u is (ncomp x ndof), column-major 22182b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 22282b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 22382b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 22482b77998SStan Tomov vv[i+r->elemsize*(d+ncomp*e)] = 22582b77998SStan Tomov uu[d+ncomp*impl->indices[i+r->elemsize*e]]; 22682b77998SStan Tomov } 22782b77998SStan Tomov } 22882b77998SStan Tomov } 22982b77998SStan Tomov } else { 23082b77998SStan Tomov // Note: in transpose mode, we perform: v += r^t * u 23182b77998SStan Tomov if (ncomp == 1) { 23282b77998SStan Tomov for (CeedInt i=0; i<esize; i++) vv[impl->indices[i]] += uu[i]; 23382b77998SStan Tomov } else { 23482b77998SStan Tomov // u is (elemsize x ncomp x nelem) 23582b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 23682b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 23782b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 23882b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 23982b77998SStan Tomov vv[impl->indices[i+r->elemsize*e]+r->ndof*d] += 24082b77998SStan Tomov uu[i+r->elemsize*(d+e*ncomp)]; 24182b77998SStan Tomov } 24282b77998SStan Tomov } else { // vv is (ncomp x ndof), column-major 24382b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 24482b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 24582b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 24682b77998SStan Tomov vv[d+ncomp*impl->indices[i+r->elemsize*e]] += 24782b77998SStan Tomov uu[i+r->elemsize*(d+e*ncomp)]; 24882b77998SStan Tomov } 24982b77998SStan Tomov } 25082b77998SStan Tomov } 25182b77998SStan Tomov } 25282b77998SStan Tomov ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 25382b77998SStan Tomov ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 25482b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 25582b77998SStan Tomov *request = NULL; 25682b77998SStan Tomov return 0; 25782b77998SStan Tomov } 25882b77998SStan Tomov 25982b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 26082b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 26182b77998SStan Tomov int ierr; 26282b77998SStan Tomov 26382b77998SStan Tomov ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 26482b77998SStan Tomov ierr = CeedFree(&r->data); CeedChk(ierr); 26582b77998SStan Tomov return 0; 26682b77998SStan Tomov } 26782b77998SStan Tomov 26882b77998SStan Tomov static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 26982b77998SStan Tomov CeedMemType mtype, 270*93fbe329SStan Tomov CeedCopyMode cmode, 271*93fbe329SStan Tomov const CeedInt *indices) { 27282b77998SStan Tomov int ierr; 27382b77998SStan Tomov CeedElemRestriction_Magma *impl; 27482b77998SStan Tomov 27582b77998SStan Tomov if (mtype != CEED_MEM_HOST) 27682b77998SStan Tomov return CeedError(r->ceed, 1, "Only MemType = HOST supported"); 27782b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 27882b77998SStan Tomov switch (cmode) { 27982b77998SStan Tomov case CEED_COPY_VALUES: 28082b77998SStan Tomov ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated); 28182b77998SStan Tomov CeedChk(ierr); 28282b77998SStan Tomov memcpy(impl->indices_allocated, indices, 28382b77998SStan Tomov r->nelem * r->elemsize * sizeof(indices[0])); 28482b77998SStan Tomov impl->indices = impl->indices_allocated; 28582b77998SStan Tomov break; 28682b77998SStan Tomov case CEED_OWN_POINTER: 28782b77998SStan Tomov impl->indices_allocated = (CeedInt *)indices; 28882b77998SStan Tomov impl->indices = impl->indices_allocated; 28982b77998SStan Tomov break; 29082b77998SStan Tomov case CEED_USE_POINTER: 29182b77998SStan Tomov impl->indices = indices; 29282b77998SStan Tomov } 29382b77998SStan Tomov r->data = impl; 29482b77998SStan Tomov r->Apply = CeedElemRestrictionApply_Magma; 29582b77998SStan Tomov r->Destroy = CeedElemRestrictionDestroy_Magma; 29682b77998SStan Tomov return 0; 29782b77998SStan Tomov } 29882b77998SStan Tomov 29982b77998SStan Tomov // Contracts on the middle index 30082b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc 30182b77998SStan Tomov // TRANSPOSE: V_ajc = T_bj U_abc 30282b77998SStan Tomov // If Add != 0, "=" is replaced by "+=" 30382b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed, 30482b77998SStan Tomov CeedInt A, CeedInt B, CeedInt C, CeedInt J, 30582b77998SStan Tomov const CeedScalar *t, CeedTransposeMode tmode, 30682b77998SStan Tomov const CeedInt Add, 30782b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 30882b77998SStan Tomov CeedInt tstride0 = B, tstride1 = 1; 30982b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 31082b77998SStan Tomov tstride0 = 1; tstride1 = J; 31182b77998SStan Tomov } 31282b77998SStan Tomov 31382b77998SStan Tomov for (CeedInt a=0; a<A; a++) { 31482b77998SStan Tomov for (CeedInt j=0; j<J; j++) { 31582b77998SStan Tomov if (!Add) { 31682b77998SStan Tomov for (CeedInt c=0; c<C; c++) 31782b77998SStan Tomov v[(a*J+j)*C+c] = 0; 31882b77998SStan Tomov } 31982b77998SStan Tomov for (CeedInt b=0; b<B; b++) { 32082b77998SStan Tomov for (CeedInt c=0; c<C; c++) { 32182b77998SStan Tomov v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 32282b77998SStan Tomov } 32382b77998SStan Tomov } 32482b77998SStan Tomov } 32582b77998SStan Tomov } 32682b77998SStan Tomov return 0; 32782b77998SStan Tomov } 32882b77998SStan Tomov 32982b77998SStan Tomov static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 33082b77998SStan Tomov CeedEvalMode emode, 33182b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 33282b77998SStan Tomov int ierr; 33382b77998SStan Tomov const CeedInt dim = basis->dim; 33482b77998SStan Tomov const CeedInt ndof = basis->ndof; 33582b77998SStan Tomov const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 33682b77998SStan Tomov const CeedInt add = (tmode == CEED_TRANSPOSE); 33782b77998SStan Tomov 33882b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 33982b77998SStan Tomov const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 34082b77998SStan Tomov for (CeedInt i = 0; i < vsize; i++) 34182b77998SStan Tomov v[i] = (CeedScalar) 0; 34282b77998SStan Tomov } 34382b77998SStan Tomov if (emode & CEED_EVAL_INTERP) { 34482b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 34582b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 34682b77998SStan Tomov P = basis->Q1d; Q = basis->P1d; 34782b77998SStan Tomov } 34882b77998SStan Tomov CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 34982b77998SStan Tomov CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 35082b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 35182b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 35282b77998SStan Tomov tmode, add&&(d==dim-1), 35382b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 35482b77998SStan Tomov CeedChk(ierr); 35582b77998SStan Tomov pre /= P; 35682b77998SStan Tomov post *= Q; 35782b77998SStan Tomov } 35882b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 35982b77998SStan Tomov v += nqpt; 36082b77998SStan Tomov } else { 36182b77998SStan Tomov u += nqpt; 36282b77998SStan Tomov } 36382b77998SStan Tomov } 36482b77998SStan Tomov if (emode & CEED_EVAL_GRAD) { 36582b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 36682b77998SStan Tomov // In CEED_NOTRANSPOSE mode: 36782b77998SStan Tomov // u is (P^dim x nc), column-major layout (nc = ndof) 36882b77998SStan Tomov // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 36982b77998SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 37082b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 37182b77998SStan Tomov P = basis->Q1d, Q = basis->P1d; 37282b77998SStan Tomov } 37382b77998SStan Tomov CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 37482b77998SStan Tomov for (CeedInt p = 0; p < dim; p++) { 37582b77998SStan Tomov CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 37682b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 37782b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 37882b77998SStan Tomov (p==d)?basis->grad1d:basis->interp1d, 37982b77998SStan Tomov tmode, add&&(d==dim-1), 38082b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 38182b77998SStan Tomov CeedChk(ierr); 38282b77998SStan Tomov pre /= P; 38382b77998SStan Tomov post *= Q; 38482b77998SStan Tomov } 38582b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 38682b77998SStan Tomov v += nqpt; 38782b77998SStan Tomov } else { 38882b77998SStan Tomov u += nqpt; 38982b77998SStan Tomov } 39082b77998SStan Tomov } 39182b77998SStan Tomov } 39282b77998SStan Tomov if (emode & CEED_EVAL_WEIGHT) { 39382b77998SStan Tomov if (tmode == CEED_TRANSPOSE) 39482b77998SStan Tomov return CeedError(basis->ceed, 1, 39582b77998SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 39682b77998SStan Tomov CeedInt Q = basis->Q1d; 39782b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 39882b77998SStan Tomov CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 39982b77998SStan Tomov for (CeedInt i=0; i<pre; i++) { 40082b77998SStan Tomov for (CeedInt j=0; j<Q; j++) { 40182b77998SStan Tomov for (CeedInt k=0; k<post; k++) { 40282b77998SStan Tomov v[(i*Q + j)*post + k] = basis->qweight1d[j] 40382b77998SStan Tomov * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 40482b77998SStan Tomov } 40582b77998SStan Tomov } 40682b77998SStan Tomov } 40782b77998SStan Tomov } 40882b77998SStan Tomov } 40982b77998SStan Tomov return 0; 41082b77998SStan Tomov } 41182b77998SStan Tomov 41282b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) { 41382b77998SStan Tomov return 0; 41482b77998SStan Tomov } 41582b77998SStan Tomov 41682b77998SStan Tomov static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 41782b77998SStan Tomov CeedInt Q1d, const CeedScalar *interp1d, 41882b77998SStan Tomov const CeedScalar *grad1d, 41982b77998SStan Tomov const CeedScalar *qref1d, 42082b77998SStan Tomov const CeedScalar *qweight1d, 42182b77998SStan Tomov CeedBasis basis) { 42282b77998SStan Tomov basis->Apply = CeedBasisApply_Magma; 42382b77998SStan Tomov basis->Destroy = CeedBasisDestroy_Magma; 42482b77998SStan Tomov return 0; 42582b77998SStan Tomov } 42682b77998SStan Tomov 42782b77998SStan Tomov static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q, 42882b77998SStan Tomov const CeedScalar *const *u, 42982b77998SStan Tomov CeedScalar *const *v) { 43082b77998SStan Tomov int ierr; 43182b77998SStan Tomov ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 43282b77998SStan Tomov return 0; 43382b77998SStan Tomov } 43482b77998SStan Tomov 43582b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 43682b77998SStan Tomov return 0; 43782b77998SStan Tomov } 43882b77998SStan Tomov 43982b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 44082b77998SStan Tomov qf->Apply = CeedQFunctionApply_Magma; 44182b77998SStan Tomov qf->Destroy = CeedQFunctionDestroy_Magma; 44282b77998SStan Tomov return 0; 44382b77998SStan Tomov } 44482b77998SStan Tomov 44582b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) { 44682b77998SStan Tomov CeedOperator_Magma *impl = op->data; 44782b77998SStan Tomov int ierr; 44882b77998SStan Tomov 44982b77998SStan Tomov ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 45082b77998SStan Tomov ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 45182b77998SStan Tomov ierr = CeedFree(&op->data); CeedChk(ierr); 45282b77998SStan Tomov return 0; 45382b77998SStan Tomov } 45482b77998SStan Tomov 45582b77998SStan Tomov static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata, 45682b77998SStan Tomov CeedVector ustate, 45782b77998SStan Tomov CeedVector residual, CeedRequest *request) { 45882b77998SStan Tomov CeedOperator_Magma *impl = op->data; 45982b77998SStan Tomov CeedVector etmp; 46082b77998SStan Tomov CeedInt Q; 46182b77998SStan Tomov const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 46282b77998SStan Tomov CeedScalar *Eu; 46382b77998SStan Tomov char *qd; 46482b77998SStan Tomov int ierr; 46582b77998SStan Tomov CeedTransposeMode lmode = CEED_NOTRANSPOSE; 46682b77998SStan Tomov 46782b77998SStan Tomov if (!impl->etmp) { 46882b77998SStan Tomov ierr = CeedVectorCreate(op->ceed, 46982b77998SStan Tomov nc * op->Erestrict->nelem * op->Erestrict->elemsize, 47082b77998SStan Tomov &impl->etmp); CeedChk(ierr); 47182b77998SStan Tomov // etmp is allocated when CeedVectorGetArray is called below 47282b77998SStan Tomov } 47382b77998SStan Tomov etmp = impl->etmp; 47482b77998SStan Tomov if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 47582b77998SStan Tomov ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 47682b77998SStan Tomov nc, lmode, ustate, etmp, 47782b77998SStan Tomov CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 47882b77998SStan Tomov } 47982b77998SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 48082b77998SStan Tomov ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 48182b77998SStan Tomov ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 48282b77998SStan Tomov CeedChk(ierr); 48382b77998SStan Tomov for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 48482b77998SStan Tomov CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 48582b77998SStan Tomov const CeedScalar *in[5] = {0,0,0,0,0}; 48682b77998SStan Tomov // TODO: quadrature weights can be computed just once 48782b77998SStan Tomov ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 48882b77998SStan Tomov &Eu[e*op->Erestrict->elemsize*nc], BEu); 48982b77998SStan Tomov CeedChk(ierr); 49082b77998SStan Tomov CeedScalar *u_ptr = BEu, *v_ptr = BEv; 49182b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 49282b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 49382b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 49482b77998SStan Tomov if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 49582b77998SStan Tomov if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 49682b77998SStan Tomov ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 49782b77998SStan Tomov CeedChk(ierr); 49882b77998SStan Tomov ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 49982b77998SStan Tomov &Eu[e*op->Erestrict->elemsize*nc]); 50082b77998SStan Tomov CeedChk(ierr); 50182b77998SStan Tomov } 50282b77998SStan Tomov ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 50382b77998SStan Tomov if (residual) { 50482b77998SStan Tomov CeedScalar *res; 50582b77998SStan Tomov CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 50682b77998SStan Tomov for (int i = 0; i < residual->length; i++) 50782b77998SStan Tomov res[i] = (CeedScalar)0; 50882b77998SStan Tomov ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 50982b77998SStan Tomov nc, lmode, etmp, residual, 51082b77998SStan Tomov CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 51182b77998SStan Tomov } 51282b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 51382b77998SStan Tomov *request = NULL; 51482b77998SStan Tomov return 0; 51582b77998SStan Tomov } 51682b77998SStan Tomov 51782b77998SStan Tomov static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) { 51882b77998SStan Tomov CeedOperator_Magma *impl = op->data; 51982b77998SStan Tomov int ierr; 52082b77998SStan Tomov 52182b77998SStan Tomov if (!impl->qdata) { 52282b77998SStan Tomov CeedInt Q; 52382b77998SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 52482b77998SStan Tomov ierr = CeedVectorCreate(op->ceed, 52582b77998SStan Tomov op->Erestrict->nelem * Q 52682b77998SStan Tomov * op->qf->qdatasize / sizeof(CeedScalar), 52782b77998SStan Tomov &impl->qdata); CeedChk(ierr); 52882b77998SStan Tomov } 52982b77998SStan Tomov *qdata = impl->qdata; 53082b77998SStan Tomov return 0; 53182b77998SStan Tomov } 53282b77998SStan Tomov 53382b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) { 53482b77998SStan Tomov CeedOperator_Magma *impl; 53582b77998SStan Tomov int ierr; 53682b77998SStan Tomov 53782b77998SStan Tomov ierr = CeedCalloc(1, &impl); CeedChk(ierr); 53882b77998SStan Tomov op->data = impl; 53982b77998SStan Tomov op->Destroy = CeedOperatorDestroy_Magma; 54082b77998SStan Tomov op->Apply = CeedOperatorApply_Magma; 54182b77998SStan Tomov op->GetQData = CeedOperatorGetQData_Magma; 54282b77998SStan Tomov return 0; 54382b77998SStan Tomov } 54482b77998SStan Tomov 54582b77998SStan Tomov // ***************************************************************************** 54682b77998SStan Tomov // * INIT 54782b77998SStan Tomov // ***************************************************************************** 54882b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) { 54982b77998SStan Tomov if (strcmp(resource, "/cpu/magma") 55082b77998SStan Tomov && strcmp(resource, "/gpu/magma")) 55182b77998SStan Tomov return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 552*93fbe329SStan Tomov 553*93fbe329SStan Tomov magma_init(); 554*93fbe329SStan Tomov //magma_print_environment(); 555*93fbe329SStan Tomov 55682b77998SStan Tomov ceed->VecCreate = CeedVectorCreate_Magma; 55782b77998SStan Tomov ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 55882b77998SStan Tomov ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 55982b77998SStan Tomov ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 56082b77998SStan Tomov ceed->OperatorCreate = CeedOperatorCreate_Magma; 56182b77998SStan Tomov return 0; 56282b77998SStan Tomov } 56382b77998SStan Tomov 56482b77998SStan Tomov // ***************************************************************************** 56582b77998SStan Tomov // * REGISTER 56682b77998SStan Tomov // ***************************************************************************** 56782b77998SStan Tomov __attribute__((constructor)) 56882b77998SStan Tomov static void Register(void) { 56982b77998SStan Tomov CeedRegister("/cpu/magma", CeedInit_Magma); 57082b77998SStan Tomov CeedRegister("/gpu/magma", CeedInit_Magma); 57182b77998SStan Tomov } 572