xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.c (revision 93fbe329e5fde74447dcd5c0cb0ca879fdf7f6a5)
182b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
282b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
382b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details.
482b77998SStan Tomov //
582b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
682b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
782b77998SStan Tomov // element discretizations for exascale applications. For more information and
882b77998SStan Tomov // source code availability see http://github.com/ceed.
982b77998SStan Tomov //
1082b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1182b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
1282b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
1382b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
1482b77998SStan Tomov // software, applications, hardware, advanced system engineering and early
1582b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
1682b77998SStan Tomov 
1782b77998SStan Tomov #include <ceed-impl.h>
1882b77998SStan Tomov #include <string.h>
19*93fbe329SStan Tomov #include "magma.h"
2082b77998SStan Tomov 
2182b77998SStan Tomov typedef struct {
2282b77998SStan Tomov   CeedScalar *array;
23*93fbe329SStan Tomov   CeedScalar *darray;
2482b77998SStan Tomov } CeedVector_Magma;
2582b77998SStan Tomov 
2682b77998SStan Tomov typedef struct {
2782b77998SStan Tomov   const CeedInt *indices;
2882b77998SStan Tomov   CeedInt *indices_allocated;
2982b77998SStan Tomov } CeedElemRestriction_Magma;
3082b77998SStan Tomov 
3182b77998SStan Tomov typedef struct {
3282b77998SStan Tomov   CeedVector etmp;
3382b77998SStan Tomov   CeedVector qdata;
3482b77998SStan Tomov } CeedOperator_Magma;
3582b77998SStan Tomov 
36*93fbe329SStan Tomov // *****************************************************************************
37*93fbe329SStan Tomov // * Initialize vector vec (after free mem) with values from array based on cmode
38*93fbe329SStan Tomov // *   CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal
39*93fbe329SStan Tomov // *                     to array, and data is copied (not store passed pointer)
40*93fbe329SStan Tomov // *   CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array
41*93fbe329SStan Tomov // *   CEED_USE_POINTER: vec->data->array = array (can modify; no ownership)
42*93fbe329SStan Tomov // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE
43*93fbe329SStan Tomov // *****************************************************************************
4482b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype,
4582b77998SStan Tomov                                     CeedCopyMode cmode, CeedScalar *array) {
4682b77998SStan Tomov     CeedVector_Magma *impl = vec->data;
4782b77998SStan Tomov     int ierr;
4882b77998SStan Tomov 
4982b77998SStan Tomov     if (mtype != CEED_MEM_HOST)
5082b77998SStan Tomov         return CeedError(vec->ceed, 1, "Only MemType = HOST supported");
51*93fbe329SStan Tomov 
5282b77998SStan Tomov     switch (cmode) {
5382b77998SStan Tomov     case CEED_COPY_VALUES:
54*93fbe329SStan Tomov         if (impl->darray == NULL) {
55*93fbe329SStan Tomov             ierr = magma_malloc( (void**)&impl->darray,
56*93fbe329SStan Tomov                                  vec->length * sizeof(CeedScalar));
57*93fbe329SStan Tomov             CeedChk(ierr);
58*93fbe329SStan Tomov         }
59*93fbe329SStan Tomov         if (array)
60*93fbe329SStan Tomov             magma_setvector(vec->length, sizeof(array[0]),
61*93fbe329SStan Tomov                             array, 1, impl->darray, 1);
6282b77998SStan Tomov         break;
6382b77998SStan Tomov     case CEED_OWN_POINTER:
64*93fbe329SStan Tomov         if (impl->darray == NULL){
65*93fbe329SStan Tomov             ierr = magma_malloc( (void**)&impl->darray,
66*93fbe329SStan Tomov                                  vec->length * sizeof(CeedScalar));
67*93fbe329SStan Tomov             CeedChk(ierr);
68*93fbe329SStan Tomov         }
69*93fbe329SStan Tomov         if (array)
70*93fbe329SStan Tomov             magma_setvector(vec->length, sizeof(array[0]),
71*93fbe329SStan Tomov                             array, 1, impl->darray, 1);
72*93fbe329SStan Tomov 
73*93fbe329SStan Tomov         ierr = CeedFree(array); CeedChk(ierr);
7482b77998SStan Tomov         break;
7582b77998SStan Tomov     case CEED_USE_POINTER:
76*93fbe329SStan Tomov         if (impl->darray != NULL){
77*93fbe329SStan Tomov             ierr = magma_free(impl->darray);
78*93fbe329SStan Tomov             CeedChk(ierr);
79*93fbe329SStan Tomov             impl->darray = NULL;
80*93fbe329SStan Tomov         }
8182b77998SStan Tomov         impl->array = array;
8282b77998SStan Tomov     }
8382b77998SStan Tomov     return 0;
8482b77998SStan Tomov }
8582b77998SStan Tomov 
86*93fbe329SStan Tomov // *****************************************************************************
87*93fbe329SStan Tomov // * Get data from Device vector vec into CeedScalar array on the CPU
88*93fbe329SStan Tomov // *****************************************************************************
8982b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype,
9082b77998SStan Tomov                                     CeedScalar **array) {
9182b77998SStan Tomov     CeedVector_Magma *impl = vec->data;
9282b77998SStan Tomov     int ierr;
9382b77998SStan Tomov 
9482b77998SStan Tomov     if (mtype != CEED_MEM_HOST)
9582b77998SStan Tomov         return CeedError(vec->ceed, 1, "Can only provide to HOST memory");
96*93fbe329SStan Tomov 
9782b77998SStan Tomov     if (!impl->array) { // Allocate if array is not yet allocated
98*93fbe329SStan Tomov         ierr = CeedMalloc(vec->length, &impl->array); CeedChk(ierr);
99*93fbe329SStan Tomov     }
100*93fbe329SStan Tomov     if (!impl->darray){ // Get data from the Device if owned (darray!=NULL)
101*93fbe329SStan Tomov         magma_getvector(vec->length, sizeof(array[0]),
102*93fbe329SStan Tomov                         impl->darray, 1, array, 1);
10382b77998SStan Tomov     }
10482b77998SStan Tomov     *array = impl->array;
10582b77998SStan Tomov     return 0;
10682b77998SStan Tomov }
10782b77998SStan Tomov 
108*93fbe329SStan Tomov // *****************************************************************************
109*93fbe329SStan Tomov // * Get data from Device vector vec into CeedScalar array on the CPU for reading
110*93fbe329SStan Tomov // *****************************************************************************
11182b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype,
11282b77998SStan Tomov                                       const CeedScalar **array) {
11382b77998SStan Tomov     CeedVector_Magma *impl = vec->data;
11482b77998SStan Tomov     int ierr;
11582b77998SStan Tomov 
11682b77998SStan Tomov     if (mtype != CEED_MEM_HOST)
11782b77998SStan Tomov         return CeedError(vec->ceed, 1, "Can only provide to HOST memory");
118*93fbe329SStan Tomov 
11982b77998SStan Tomov     if (!impl->array) { // Allocate if array is not yet allocated
120*93fbe329SStan Tomov         ierr = CeedMalloc(vec->length, &impl->array); CeedChk(ierr);
12182b77998SStan Tomov     }
122*93fbe329SStan Tomov     if (!impl->darray){ // Get data from the Device if owned (darray!=NULL)
123*93fbe329SStan Tomov         magma_getvector(vec->length, sizeof(array[0]),
124*93fbe329SStan Tomov                         impl->darray, 1, array, 1);
125*93fbe329SStan Tomov     }
126*93fbe329SStan Tomov 
12782b77998SStan Tomov     *array = impl->array;
12882b77998SStan Tomov     return 0;
12982b77998SStan Tomov }
13082b77998SStan Tomov 
131*93fbe329SStan Tomov // *****************************************************************************
132*93fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values
133*93fbe329SStan Tomov // * from vec and possibly modified them.
134*93fbe329SStan Tomov // *****************************************************************************
13582b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) {
136*93fbe329SStan Tomov     CeedVector_Magma *impl = vec->data;
137*93fbe329SStan Tomov 
138*93fbe329SStan Tomov     if (!impl->darray) {// vec owns the data, so it is on GPU
139*93fbe329SStan Tomov         magma_setvector(vec->length, sizeof(*array[0]),
140*93fbe329SStan Tomov                         *array, 1, impl->darray, 1);
141*93fbe329SStan Tomov     }
14282b77998SStan Tomov     *array = NULL;
14382b77998SStan Tomov     return 0;
14482b77998SStan Tomov }
14582b77998SStan Tomov 
146*93fbe329SStan Tomov // *****************************************************************************
147*93fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values
148*93fbe329SStan Tomov // * from vec to only read them; in this case vec may have been modified meanwhile
149*93fbe329SStan Tomov // * and needs to be restored here.
150*93fbe329SStan Tomov // *****************************************************************************
15182b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec,
15282b77998SStan Tomov                                             const CeedScalar **array) {
153*93fbe329SStan Tomov     CeedVector_Magma *impl = vec->data;
154*93fbe329SStan Tomov 
155*93fbe329SStan Tomov     if (!impl->darray) {// vec owns the data, so it is on GPU
156*93fbe329SStan Tomov         magma_setvector(vec->length, sizeof(*array[0]),
157*93fbe329SStan Tomov                         *array, 1, impl->darray, 1);
158*93fbe329SStan Tomov     }
15982b77998SStan Tomov     *array = NULL;
16082b77998SStan Tomov     return 0;
16182b77998SStan Tomov }
16282b77998SStan Tomov 
16382b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) {
16482b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
16582b77998SStan Tomov   int ierr;
16682b77998SStan Tomov 
167*93fbe329SStan Tomov   if (!impl->array) {ierr = CeedFree(&impl->array); CeedChk(ierr);}
168*93fbe329SStan Tomov   ierr = magma_free(impl->darray); CeedChk(ierr);
16982b77998SStan Tomov   ierr = CeedFree(&vec->data); CeedChk(ierr);
17082b77998SStan Tomov   return 0;
17182b77998SStan Tomov }
17282b77998SStan Tomov 
173*93fbe329SStan Tomov // *****************************************************************************
174*93fbe329SStan Tomov // * Create vector vec of size n
175*93fbe329SStan Tomov // *****************************************************************************
17682b77998SStan Tomov static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) {
17782b77998SStan Tomov   CeedVector_Magma *impl;
17882b77998SStan Tomov   int ierr;
17982b77998SStan Tomov 
18082b77998SStan Tomov   vec->SetArray = CeedVectorSetArray_Magma;
18182b77998SStan Tomov   vec->GetArray = CeedVectorGetArray_Magma;
18282b77998SStan Tomov   vec->GetArrayRead = CeedVectorGetArrayRead_Magma;
18382b77998SStan Tomov   vec->RestoreArray = CeedVectorRestoreArray_Magma;
18482b77998SStan Tomov   vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma;
18582b77998SStan Tomov   vec->Destroy = CeedVectorDestroy_Magma;
18682b77998SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
187*93fbe329SStan Tomov   impl->darray = NULL;
18882b77998SStan Tomov   vec->data = impl;
18982b77998SStan Tomov   return 0;
19082b77998SStan Tomov }
19182b77998SStan Tomov 
192*93fbe329SStan Tomov // *****************************************************************************
193*93fbe329SStan Tomov // * Apply restriction operator r to u: v = r(rmode) u
194*93fbe329SStan Tomov // *****************************************************************************
19582b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r,
19682b77998SStan Tomov                                         CeedTransposeMode tmode, CeedInt ncomp,
19782b77998SStan Tomov                                         CeedTransposeMode lmode, CeedVector u,
19882b77998SStan Tomov                                         CeedVector v, CeedRequest *request) {
19982b77998SStan Tomov   CeedElemRestriction_Magma *impl = r->data;
20082b77998SStan Tomov   int ierr;
20182b77998SStan Tomov   const CeedScalar *uu;
20282b77998SStan Tomov   CeedScalar *vv;
20382b77998SStan Tomov   CeedInt esize = r->nelem*r->elemsize;
20482b77998SStan Tomov 
20582b77998SStan Tomov   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
20682b77998SStan Tomov   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
20782b77998SStan Tomov   if (tmode == CEED_NOTRANSPOSE) {
20882b77998SStan Tomov     // Perform: v = r * u
20982b77998SStan Tomov     if (ncomp == 1) {
21082b77998SStan Tomov       for (CeedInt i=0; i<esize; i++) vv[i] = uu[impl->indices[i]];
21182b77998SStan Tomov     } else {
21282b77998SStan Tomov       // vv is (elemsize x ncomp x nelem), column-major
21382b77998SStan Tomov       if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major
21482b77998SStan Tomov         for (CeedInt e = 0; e < r->nelem; e++)
21582b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
21682b77998SStan Tomov             for (CeedInt i=0; i<r->elemsize; i++) {
21782b77998SStan Tomov               vv[i+r->elemsize*(d+ncomp*e)] =
21882b77998SStan Tomov                 uu[impl->indices[i+r->elemsize*e]+r->ndof*d];
21982b77998SStan Tomov             }
22082b77998SStan Tomov       } else { // u is (ncomp x ndof), column-major
22182b77998SStan Tomov         for (CeedInt e = 0; e < r->nelem; e++)
22282b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
22382b77998SStan Tomov             for (CeedInt i=0; i<r->elemsize; i++) {
22482b77998SStan Tomov               vv[i+r->elemsize*(d+ncomp*e)] =
22582b77998SStan Tomov                 uu[d+ncomp*impl->indices[i+r->elemsize*e]];
22682b77998SStan Tomov             }
22782b77998SStan Tomov       }
22882b77998SStan Tomov     }
22982b77998SStan Tomov   } else {
23082b77998SStan Tomov     // Note: in transpose mode, we perform: v += r^t * u
23182b77998SStan Tomov     if (ncomp == 1) {
23282b77998SStan Tomov       for (CeedInt i=0; i<esize; i++) vv[impl->indices[i]] += uu[i];
23382b77998SStan Tomov     } else {
23482b77998SStan Tomov       // u is (elemsize x ncomp x nelem)
23582b77998SStan Tomov       if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major
23682b77998SStan Tomov         for (CeedInt e = 0; e < r->nelem; e++)
23782b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
23882b77998SStan Tomov             for (CeedInt i=0; i<r->elemsize; i++) {
23982b77998SStan Tomov               vv[impl->indices[i+r->elemsize*e]+r->ndof*d] +=
24082b77998SStan Tomov                 uu[i+r->elemsize*(d+e*ncomp)];
24182b77998SStan Tomov             }
24282b77998SStan Tomov       } else { // vv is (ncomp x ndof), column-major
24382b77998SStan Tomov         for (CeedInt e = 0; e < r->nelem; e++)
24482b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
24582b77998SStan Tomov             for (CeedInt i=0; i<r->elemsize; i++) {
24682b77998SStan Tomov               vv[d+ncomp*impl->indices[i+r->elemsize*e]] +=
24782b77998SStan Tomov                 uu[i+r->elemsize*(d+e*ncomp)];
24882b77998SStan Tomov             }
24982b77998SStan Tomov       }
25082b77998SStan Tomov     }
25182b77998SStan Tomov   }
25282b77998SStan Tomov   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
25382b77998SStan Tomov   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
25482b77998SStan Tomov   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
25582b77998SStan Tomov     *request = NULL;
25682b77998SStan Tomov   return 0;
25782b77998SStan Tomov }
25882b77998SStan Tomov 
25982b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) {
26082b77998SStan Tomov   CeedElemRestriction_Magma *impl = r->data;
26182b77998SStan Tomov   int ierr;
26282b77998SStan Tomov 
26382b77998SStan Tomov   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
26482b77998SStan Tomov   ierr = CeedFree(&r->data); CeedChk(ierr);
26582b77998SStan Tomov   return 0;
26682b77998SStan Tomov }
26782b77998SStan Tomov 
26882b77998SStan Tomov static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r,
26982b77998SStan Tomov                                            CeedMemType mtype,
270*93fbe329SStan Tomov                                            CeedCopyMode cmode,
271*93fbe329SStan Tomov                                            const CeedInt *indices) {
27282b77998SStan Tomov     int ierr;
27382b77998SStan Tomov     CeedElemRestriction_Magma *impl;
27482b77998SStan Tomov 
27582b77998SStan Tomov     if (mtype != CEED_MEM_HOST)
27682b77998SStan Tomov         return CeedError(r->ceed, 1, "Only MemType = HOST supported");
27782b77998SStan Tomov     ierr = CeedCalloc(1,&impl); CeedChk(ierr);
27882b77998SStan Tomov     switch (cmode) {
27982b77998SStan Tomov     case CEED_COPY_VALUES:
28082b77998SStan Tomov         ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated);
28182b77998SStan Tomov         CeedChk(ierr);
28282b77998SStan Tomov         memcpy(impl->indices_allocated, indices,
28382b77998SStan Tomov                r->nelem * r->elemsize * sizeof(indices[0]));
28482b77998SStan Tomov         impl->indices = impl->indices_allocated;
28582b77998SStan Tomov         break;
28682b77998SStan Tomov     case CEED_OWN_POINTER:
28782b77998SStan Tomov         impl->indices_allocated = (CeedInt *)indices;
28882b77998SStan Tomov         impl->indices = impl->indices_allocated;
28982b77998SStan Tomov         break;
29082b77998SStan Tomov     case CEED_USE_POINTER:
29182b77998SStan Tomov         impl->indices = indices;
29282b77998SStan Tomov     }
29382b77998SStan Tomov     r->data = impl;
29482b77998SStan Tomov     r->Apply = CeedElemRestrictionApply_Magma;
29582b77998SStan Tomov     r->Destroy = CeedElemRestrictionDestroy_Magma;
29682b77998SStan Tomov     return 0;
29782b77998SStan Tomov }
29882b77998SStan Tomov 
29982b77998SStan Tomov // Contracts on the middle index
30082b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc
30182b77998SStan Tomov // TRANSPOSE:   V_ajc = T_bj U_abc
30282b77998SStan Tomov // If Add != 0, "=" is replaced by "+="
30382b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed,
30482b77998SStan Tomov                                     CeedInt A, CeedInt B, CeedInt C, CeedInt J,
30582b77998SStan Tomov                                     const CeedScalar *t, CeedTransposeMode tmode,
30682b77998SStan Tomov                                     const CeedInt Add,
30782b77998SStan Tomov                                     const CeedScalar *u, CeedScalar *v) {
30882b77998SStan Tomov     CeedInt tstride0 = B, tstride1 = 1;
30982b77998SStan Tomov     if (tmode == CEED_TRANSPOSE) {
31082b77998SStan Tomov         tstride0 = 1; tstride1 = J;
31182b77998SStan Tomov     }
31282b77998SStan Tomov 
31382b77998SStan Tomov     for (CeedInt a=0; a<A; a++) {
31482b77998SStan Tomov         for (CeedInt j=0; j<J; j++) {
31582b77998SStan Tomov             if (!Add) {
31682b77998SStan Tomov                 for (CeedInt c=0; c<C; c++)
31782b77998SStan Tomov                     v[(a*J+j)*C+c] = 0;
31882b77998SStan Tomov             }
31982b77998SStan Tomov             for (CeedInt b=0; b<B; b++) {
32082b77998SStan Tomov                 for (CeedInt c=0; c<C; c++) {
32182b77998SStan Tomov                     v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c];
32282b77998SStan Tomov                 }
32382b77998SStan Tomov             }
32482b77998SStan Tomov         }
32582b77998SStan Tomov     }
32682b77998SStan Tomov     return 0;
32782b77998SStan Tomov }
32882b77998SStan Tomov 
32982b77998SStan Tomov static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode,
33082b77998SStan Tomov                                 CeedEvalMode emode,
33182b77998SStan Tomov                                 const CeedScalar *u, CeedScalar *v) {
33282b77998SStan Tomov     int ierr;
33382b77998SStan Tomov     const CeedInt dim = basis->dim;
33482b77998SStan Tomov     const CeedInt ndof = basis->ndof;
33582b77998SStan Tomov     const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim);
33682b77998SStan Tomov     const CeedInt add = (tmode == CEED_TRANSPOSE);
33782b77998SStan Tomov 
33882b77998SStan Tomov     if (tmode == CEED_TRANSPOSE) {
33982b77998SStan Tomov         const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim);
34082b77998SStan Tomov         for (CeedInt i = 0; i < vsize; i++)
34182b77998SStan Tomov             v[i] = (CeedScalar) 0;
34282b77998SStan Tomov     }
34382b77998SStan Tomov     if (emode & CEED_EVAL_INTERP) {
34482b77998SStan Tomov         CeedInt P = basis->P1d, Q = basis->Q1d;
34582b77998SStan Tomov         if (tmode == CEED_TRANSPOSE) {
34682b77998SStan Tomov             P = basis->Q1d; Q = basis->P1d;
34782b77998SStan Tomov         }
34882b77998SStan Tomov         CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
34982b77998SStan Tomov         CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
35082b77998SStan Tomov         for (CeedInt d=0; d<dim; d++) {
35182b77998SStan Tomov             ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d,
35282b77998SStan Tomov                                             tmode, add&&(d==dim-1),
35382b77998SStan Tomov                                             d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
35482b77998SStan Tomov             CeedChk(ierr);
35582b77998SStan Tomov             pre /= P;
35682b77998SStan Tomov             post *= Q;
35782b77998SStan Tomov         }
35882b77998SStan Tomov         if (tmode == CEED_NOTRANSPOSE) {
35982b77998SStan Tomov             v += nqpt;
36082b77998SStan Tomov         } else {
36182b77998SStan Tomov             u += nqpt;
36282b77998SStan Tomov         }
36382b77998SStan Tomov     }
36482b77998SStan Tomov     if (emode & CEED_EVAL_GRAD) {
36582b77998SStan Tomov         CeedInt P = basis->P1d, Q = basis->Q1d;
36682b77998SStan Tomov         // In CEED_NOTRANSPOSE mode:
36782b77998SStan Tomov         // u is (P^dim x nc), column-major layout (nc = ndof)
36882b77998SStan Tomov         // v is (Q^dim x nc x dim), column-major layout (nc = ndof)
36982b77998SStan Tomov         // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
37082b77998SStan Tomov         if (tmode == CEED_TRANSPOSE) {
37182b77998SStan Tomov             P = basis->Q1d, Q = basis->P1d;
37282b77998SStan Tomov         }
37382b77998SStan Tomov         CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
37482b77998SStan Tomov         for (CeedInt p = 0; p < dim; p++) {
37582b77998SStan Tomov             CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
37682b77998SStan Tomov             for (CeedInt d=0; d<dim; d++) {
37782b77998SStan Tomov                 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q,
37882b77998SStan Tomov                                                 (p==d)?basis->grad1d:basis->interp1d,
37982b77998SStan Tomov                                                 tmode, add&&(d==dim-1),
38082b77998SStan Tomov                                                 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
38182b77998SStan Tomov                 CeedChk(ierr);
38282b77998SStan Tomov                 pre /= P;
38382b77998SStan Tomov                 post *= Q;
38482b77998SStan Tomov             }
38582b77998SStan Tomov             if (tmode == CEED_NOTRANSPOSE) {
38682b77998SStan Tomov                 v += nqpt;
38782b77998SStan Tomov             } else {
38882b77998SStan Tomov                 u += nqpt;
38982b77998SStan Tomov             }
39082b77998SStan Tomov         }
39182b77998SStan Tomov     }
39282b77998SStan Tomov     if (emode & CEED_EVAL_WEIGHT) {
39382b77998SStan Tomov         if (tmode == CEED_TRANSPOSE)
39482b77998SStan Tomov             return CeedError(basis->ceed, 1,
39582b77998SStan Tomov                              "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
39682b77998SStan Tomov         CeedInt Q = basis->Q1d;
39782b77998SStan Tomov         for (CeedInt d=0; d<dim; d++) {
39882b77998SStan Tomov             CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d);
39982b77998SStan Tomov             for (CeedInt i=0; i<pre; i++) {
40082b77998SStan Tomov                 for (CeedInt j=0; j<Q; j++) {
40182b77998SStan Tomov                     for (CeedInt k=0; k<post; k++) {
40282b77998SStan Tomov                         v[(i*Q + j)*post + k] = basis->qweight1d[j]
40382b77998SStan Tomov                             * (d == 0 ? 1 : v[(i*Q + j)*post + k]);
40482b77998SStan Tomov                     }
40582b77998SStan Tomov                 }
40682b77998SStan Tomov             }
40782b77998SStan Tomov         }
40882b77998SStan Tomov     }
40982b77998SStan Tomov     return 0;
41082b77998SStan Tomov }
41182b77998SStan Tomov 
41282b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) {
41382b77998SStan Tomov     return 0;
41482b77998SStan Tomov }
41582b77998SStan Tomov 
41682b77998SStan Tomov static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d,
41782b77998SStan Tomov                                          CeedInt Q1d, const CeedScalar *interp1d,
41882b77998SStan Tomov                                          const CeedScalar *grad1d,
41982b77998SStan Tomov                                          const CeedScalar *qref1d,
42082b77998SStan Tomov                                          const CeedScalar *qweight1d,
42182b77998SStan Tomov                                          CeedBasis basis) {
42282b77998SStan Tomov     basis->Apply = CeedBasisApply_Magma;
42382b77998SStan Tomov     basis->Destroy = CeedBasisDestroy_Magma;
42482b77998SStan Tomov     return 0;
42582b77998SStan Tomov }
42682b77998SStan Tomov 
42782b77998SStan Tomov static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q,
42882b77998SStan Tomov                                     const CeedScalar *const *u,
42982b77998SStan Tomov                                     CeedScalar *const *v) {
43082b77998SStan Tomov     int ierr;
43182b77998SStan Tomov     ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr);
43282b77998SStan Tomov     return 0;
43382b77998SStan Tomov }
43482b77998SStan Tomov 
43582b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) {
43682b77998SStan Tomov     return 0;
43782b77998SStan Tomov }
43882b77998SStan Tomov 
43982b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) {
44082b77998SStan Tomov     qf->Apply = CeedQFunctionApply_Magma;
44182b77998SStan Tomov     qf->Destroy = CeedQFunctionDestroy_Magma;
44282b77998SStan Tomov     return 0;
44382b77998SStan Tomov }
44482b77998SStan Tomov 
44582b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) {
44682b77998SStan Tomov     CeedOperator_Magma *impl = op->data;
44782b77998SStan Tomov     int ierr;
44882b77998SStan Tomov 
44982b77998SStan Tomov     ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr);
45082b77998SStan Tomov     ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr);
45182b77998SStan Tomov     ierr = CeedFree(&op->data); CeedChk(ierr);
45282b77998SStan Tomov     return 0;
45382b77998SStan Tomov }
45482b77998SStan Tomov 
45582b77998SStan Tomov static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata,
45682b77998SStan Tomov                                    CeedVector ustate,
45782b77998SStan Tomov                                    CeedVector residual, CeedRequest *request) {
45882b77998SStan Tomov     CeedOperator_Magma *impl = op->data;
45982b77998SStan Tomov     CeedVector etmp;
46082b77998SStan Tomov     CeedInt Q;
46182b77998SStan Tomov     const CeedInt nc = op->basis->ndof, dim = op->basis->dim;
46282b77998SStan Tomov     CeedScalar *Eu;
46382b77998SStan Tomov     char *qd;
46482b77998SStan Tomov     int ierr;
46582b77998SStan Tomov     CeedTransposeMode lmode = CEED_NOTRANSPOSE;
46682b77998SStan Tomov 
46782b77998SStan Tomov     if (!impl->etmp) {
46882b77998SStan Tomov         ierr = CeedVectorCreate(op->ceed,
46982b77998SStan Tomov                                 nc * op->Erestrict->nelem * op->Erestrict->elemsize,
47082b77998SStan Tomov                                 &impl->etmp); CeedChk(ierr);
47182b77998SStan Tomov         // etmp is allocated when CeedVectorGetArray is called below
47282b77998SStan Tomov     }
47382b77998SStan Tomov     etmp = impl->etmp;
47482b77998SStan Tomov     if (op->qf->inmode & ~CEED_EVAL_WEIGHT) {
47582b77998SStan Tomov         ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE,
47682b77998SStan Tomov                                         nc, lmode, ustate, etmp,
47782b77998SStan Tomov                                         CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
47882b77998SStan Tomov     }
47982b77998SStan Tomov     ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
48082b77998SStan Tomov     ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr);
48182b77998SStan Tomov     ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd);
48282b77998SStan Tomov     CeedChk(ierr);
48382b77998SStan Tomov     for (CeedInt e=0; e<op->Erestrict->nelem; e++) {
48482b77998SStan Tomov         CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0};
48582b77998SStan Tomov         const CeedScalar *in[5] = {0,0,0,0,0};
48682b77998SStan Tomov         // TODO: quadrature weights can be computed just once
48782b77998SStan Tomov         ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode,
48882b77998SStan Tomov                               &Eu[e*op->Erestrict->elemsize*nc], BEu);
48982b77998SStan Tomov         CeedChk(ierr);
49082b77998SStan Tomov         CeedScalar *u_ptr = BEu, *v_ptr = BEv;
49182b77998SStan Tomov         if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; }
49282b77998SStan Tomov         if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; }
49382b77998SStan Tomov         if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; }
49482b77998SStan Tomov         if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; }
49582b77998SStan Tomov         if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; }
49682b77998SStan Tomov         ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out);
49782b77998SStan Tomov         CeedChk(ierr);
49882b77998SStan Tomov         ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv,
49982b77998SStan Tomov                               &Eu[e*op->Erestrict->elemsize*nc]);
50082b77998SStan Tomov         CeedChk(ierr);
50182b77998SStan Tomov     }
50282b77998SStan Tomov     ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr);
50382b77998SStan Tomov     if (residual) {
50482b77998SStan Tomov         CeedScalar *res;
50582b77998SStan Tomov         CeedVectorGetArray(residual, CEED_MEM_HOST, &res);
50682b77998SStan Tomov         for (int i = 0; i < residual->length; i++)
50782b77998SStan Tomov             res[i] = (CeedScalar)0;
50882b77998SStan Tomov         ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE,
50982b77998SStan Tomov                                         nc, lmode, etmp, residual,
51082b77998SStan Tomov                                         CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
51182b77998SStan Tomov     }
51282b77998SStan Tomov     if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
51382b77998SStan Tomov         *request = NULL;
51482b77998SStan Tomov     return 0;
51582b77998SStan Tomov }
51682b77998SStan Tomov 
51782b77998SStan Tomov static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) {
51882b77998SStan Tomov     CeedOperator_Magma *impl = op->data;
51982b77998SStan Tomov     int ierr;
52082b77998SStan Tomov 
52182b77998SStan Tomov     if (!impl->qdata) {
52282b77998SStan Tomov         CeedInt Q;
52382b77998SStan Tomov         ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
52482b77998SStan Tomov         ierr = CeedVectorCreate(op->ceed,
52582b77998SStan Tomov                                 op->Erestrict->nelem * Q
52682b77998SStan Tomov                                 * op->qf->qdatasize / sizeof(CeedScalar),
52782b77998SStan Tomov                                 &impl->qdata); CeedChk(ierr);
52882b77998SStan Tomov     }
52982b77998SStan Tomov     *qdata = impl->qdata;
53082b77998SStan Tomov     return 0;
53182b77998SStan Tomov }
53282b77998SStan Tomov 
53382b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) {
53482b77998SStan Tomov     CeedOperator_Magma *impl;
53582b77998SStan Tomov     int ierr;
53682b77998SStan Tomov 
53782b77998SStan Tomov     ierr = CeedCalloc(1, &impl); CeedChk(ierr);
53882b77998SStan Tomov     op->data = impl;
53982b77998SStan Tomov     op->Destroy = CeedOperatorDestroy_Magma;
54082b77998SStan Tomov     op->Apply = CeedOperatorApply_Magma;
54182b77998SStan Tomov     op->GetQData = CeedOperatorGetQData_Magma;
54282b77998SStan Tomov     return 0;
54382b77998SStan Tomov }
54482b77998SStan Tomov 
54582b77998SStan Tomov // *****************************************************************************
54682b77998SStan Tomov // * INIT
54782b77998SStan Tomov // *****************************************************************************
54882b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) {
54982b77998SStan Tomov     if (strcmp(resource, "/cpu/magma")
55082b77998SStan Tomov         && strcmp(resource, "/gpu/magma"))
55182b77998SStan Tomov         return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource);
552*93fbe329SStan Tomov 
553*93fbe329SStan Tomov     magma_init();
554*93fbe329SStan Tomov     //magma_print_environment();
555*93fbe329SStan Tomov 
55682b77998SStan Tomov     ceed->VecCreate = CeedVectorCreate_Magma;
55782b77998SStan Tomov     ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma;
55882b77998SStan Tomov     ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma;
55982b77998SStan Tomov     ceed->QFunctionCreate = CeedQFunctionCreate_Magma;
56082b77998SStan Tomov     ceed->OperatorCreate = CeedOperatorCreate_Magma;
56182b77998SStan Tomov     return 0;
56282b77998SStan Tomov }
56382b77998SStan Tomov 
56482b77998SStan Tomov // *****************************************************************************
56582b77998SStan Tomov // * REGISTER
56682b77998SStan Tomov // *****************************************************************************
56782b77998SStan Tomov __attribute__((constructor))
56882b77998SStan Tomov static void Register(void) {
56982b77998SStan Tomov     CeedRegister("/cpu/magma", CeedInit_Magma);
57082b77998SStan Tomov     CeedRegister("/gpu/magma", CeedInit_Magma);
57182b77998SStan Tomov }
572