xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.c (revision 52d6035f927efec34920a771ebaa7a03e4ffa966)
182b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
282b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
382b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details.
482b77998SStan Tomov //
582b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
682b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
782b77998SStan Tomov // element discretizations for exascale applications. For more information and
882b77998SStan Tomov // source code availability see http://github.com/ceed.
982b77998SStan Tomov //
1082b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1182b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
1282b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
1382b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
1482b77998SStan Tomov // software, applications, hardware, advanced system engineering and early
1582b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
1682b77998SStan Tomov 
1738612b08SStan Tomov #include "ceed-magma.h"
185a9ca9adSVeselin Dobrev #include <string.h>
1982b77998SStan Tomov 
2082b77998SStan Tomov typedef struct {
2182b77998SStan Tomov   CeedScalar *array;
2293fbe329SStan Tomov   CeedScalar *darray;
23e2900954SStan Tomov   int  own_;
24c119dad6SStan Tomov   int down_;
2582b77998SStan Tomov } CeedVector_Magma;
2682b77998SStan Tomov 
2782b77998SStan Tomov typedef struct {
281751b0a9SStan Tomov   CeedInt *indices;
291751b0a9SStan Tomov   CeedInt *dindices;
301751b0a9SStan Tomov   int  own_;
31c119dad6SStan Tomov   int down_;            // cover a case where we own Device memory
3282b77998SStan Tomov } CeedElemRestriction_Magma;
3382b77998SStan Tomov 
3482b77998SStan Tomov typedef struct {
3516383ffcSjeremylt   const CeedScalar **inputs;
3616383ffcSjeremylt   CeedScalar **outputs;
3716383ffcSjeremylt } CeedQFunction_Magma;
3816383ffcSjeremylt 
3916383ffcSjeremylt typedef struct {
4016383ffcSjeremylt   CeedVector *Evecs; /// E-vectors needed to apply operator (in followed by out)
4116383ffcSjeremylt   CeedScalar **Edata;
4216383ffcSjeremylt   CeedVector *evecsin;   /// Input E-vectors needed to apply operator
4316383ffcSjeremylt   CeedVector *evecsout;   /// Output E-vectors needed to apply operator
4416383ffcSjeremylt   CeedVector *qvecsin;   /// Input Q-vectors needed to apply operator
4516383ffcSjeremylt   CeedVector *qvecsout;   /// Output Q-vectors needed to apply operator
463bfcb0deSjeremylt   CeedInt    numein;
473bfcb0deSjeremylt   CeedInt    numeout;
4882b77998SStan Tomov } CeedOperator_Magma;
4982b77998SStan Tomov 
5093fbe329SStan Tomov // *****************************************************************************
5193fbe329SStan Tomov // * Initialize vector vec (after free mem) with values from array based on cmode
5293fbe329SStan Tomov // *   CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal
5393fbe329SStan Tomov // *                     to array, and data is copied (not store passed pointer)
5493fbe329SStan Tomov // *   CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array
5593fbe329SStan Tomov // *   CEED_USE_POINTER: vec->data->array = array (can modify; no ownership)
5693fbe329SStan Tomov // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE
5793fbe329SStan Tomov // *****************************************************************************
5882b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype,
5982b77998SStan Tomov                                     CeedCopyMode cmode, CeedScalar *array) {
6082b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
6182b77998SStan Tomov   int ierr;
6282b77998SStan Tomov 
63c119dad6SStan Tomov   // If own data, free the "old" data, e.g., as it may be of different size
64e2900954SStan Tomov   if (impl->own_) {
65e2900954SStan Tomov     magma_free( impl->darray );
66e2900954SStan Tomov     magma_free_pinned( impl->array );
67e2900954SStan Tomov     impl->darray = NULL;
68e2900954SStan Tomov     impl->array  = NULL;
69e2900954SStan Tomov     impl->own_ = 0;
70c119dad6SStan Tomov     impl->down_= 0;
71e2900954SStan Tomov   }
7293fbe329SStan Tomov 
73e2900954SStan Tomov   if (mtype == CEED_MEM_HOST) {
74e2900954SStan Tomov     // memory is on the host; own_ = 0
7582b77998SStan Tomov     switch (cmode) {
7682b77998SStan Tomov     case CEED_COPY_VALUES:
7793fbe329SStan Tomov       ierr = magma_malloc( (void **)&impl->darray,
78e2900954SStan Tomov                            vec->length * sizeof(CeedScalar)); CeedChk(ierr);
79e2900954SStan Tomov       ierr = magma_malloc_pinned( (void **)&impl->array,
80e2900954SStan Tomov                                   vec->length * sizeof(CeedScalar)); CeedChk(ierr);
81e2900954SStan Tomov       impl->own_ = 1;
82e2900954SStan Tomov 
83c119dad6SStan Tomov       if (array != NULL)
8493fbe329SStan Tomov         magma_setvector(vec->length, sizeof(array[0]),
8593fbe329SStan Tomov                         array, 1, impl->darray, 1);
8682b77998SStan Tomov       break;
8782b77998SStan Tomov     case CEED_OWN_POINTER:
8893fbe329SStan Tomov       ierr = magma_malloc( (void **)&impl->darray,
89e2900954SStan Tomov                            vec->length * sizeof(CeedScalar)); CeedChk(ierr);
901751b0a9SStan Tomov       // TODO: possible problem here is if we are passed non-pinned memory;
911751b0a9SStan Tomov       //       (as we own it, lter in destroy, we use free for pinned memory).
92e2900954SStan Tomov       impl->array = array;
93e2900954SStan Tomov       impl->own_ = 1;
94e2900954SStan Tomov 
95c119dad6SStan Tomov       if (array != NULL)
9693fbe329SStan Tomov         magma_setvector(vec->length, sizeof(array[0]),
9793fbe329SStan Tomov                         array, 1, impl->darray, 1);
9882b77998SStan Tomov       break;
9982b77998SStan Tomov     case CEED_USE_POINTER:
100c119dad6SStan Tomov       ierr = magma_malloc( (void **)&impl->darray,
101c119dad6SStan Tomov                            vec->length * sizeof(CeedScalar)); CeedChk(ierr);
102c119dad6SStan Tomov       magma_setvector(vec->length, sizeof(array[0]),
103c119dad6SStan Tomov                       array, 1, impl->darray, 1);
10406b636dbSStan Tomov 
10506b636dbSStan Tomov       impl->down_  = 1;
10682b77998SStan Tomov       impl->array  = array;
10782b77998SStan Tomov     }
108e2900954SStan Tomov   } else if (mtype == CEED_MEM_DEVICE) {
109e2900954SStan Tomov     // memory is on the device; own = 0
110e2900954SStan Tomov     switch (cmode) {
111e2900954SStan Tomov     case CEED_COPY_VALUES:
112e2900954SStan Tomov       ierr = magma_malloc( (void **)&impl->darray,
113e2900954SStan Tomov                            vec->length * sizeof(CeedScalar)); CeedChk(ierr);
114e2900954SStan Tomov       ierr = magma_malloc_pinned( (void **)&impl->array,
115e2900954SStan Tomov                                   vec->length * sizeof(CeedScalar)); CeedChk(ierr);
116e2900954SStan Tomov       impl->own_ = 1;
117e2900954SStan Tomov 
118e2900954SStan Tomov       if (array)
119e2900954SStan Tomov         magma_copyvector(vec->length, sizeof(array[0]),
120e2900954SStan Tomov                          array, 1, impl->darray, 1);
12106b636dbSStan Tomov       else
12206b636dbSStan Tomov         // t30 assumes allocation initializes with 0s
12306b636dbSStan Tomov         magma_setvector(vec->length, sizeof(array[0]),
12406b636dbSStan Tomov                         impl->array, 1, impl->darray, 1);
125e2900954SStan Tomov       break;
126e2900954SStan Tomov     case CEED_OWN_POINTER:
127e2900954SStan Tomov       impl->darray = array;
128e2900954SStan Tomov       ierr = magma_malloc_pinned( (void **)&impl->array,
129e2900954SStan Tomov                                   vec->length * sizeof(CeedScalar)); CeedChk(ierr);
130e2900954SStan Tomov       impl->own_ = 1;
131e2900954SStan Tomov 
132e2900954SStan Tomov       break;
133e2900954SStan Tomov     case CEED_USE_POINTER:
134e2900954SStan Tomov       impl->darray = array;
135e2900954SStan Tomov       impl->array  = NULL;
136e2900954SStan Tomov     }
137e2900954SStan Tomov 
138e2900954SStan Tomov   } else
139e2900954SStan Tomov     return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported");
140e2900954SStan Tomov 
14182b77998SStan Tomov   return 0;
14282b77998SStan Tomov }
14382b77998SStan Tomov 
14493fbe329SStan Tomov // *****************************************************************************
145e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE)
14693fbe329SStan Tomov // *****************************************************************************
14782b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype,
14882b77998SStan Tomov                                     CeedScalar **array) {
14982b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
15082b77998SStan Tomov   int ierr;
15182b77998SStan Tomov 
152e2900954SStan Tomov   if (mtype == CEED_MEM_HOST) {
153c119dad6SStan Tomov     if (impl->own_) {
154e2900954SStan Tomov       // data is owned so GPU had the most up-to-date version; copy it
15506b636dbSStan Tomov       // TTT - apparantly it doesn't have most up to date data
156e2900954SStan Tomov       magma_getvector(vec->length, sizeof(*array[0]),
157e2900954SStan Tomov                       impl->darray, 1, impl->array, 1);
15806b636dbSStan Tomov       CeedDebug("\033[31m[CeedVectorGetArray_Magma]");
15906b636dbSStan Tomov       //fprintf(stderr,"rrrrrrrrrrrrrrr\n");
160c119dad6SStan Tomov     } else if (impl->array == NULL) {
161c119dad6SStan Tomov       // Vector doesn't own the data and was set on GPU
162c119dad6SStan Tomov       if (impl->darray == NULL) {
163c119dad6SStan Tomov         // call was made just to allocate memory
164c119dad6SStan Tomov         ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
165c119dad6SStan Tomov         CeedChk(ierr);
166c119dad6SStan Tomov       } else
167c119dad6SStan Tomov         return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST");
16882b77998SStan Tomov     }
16982b77998SStan Tomov     *array = impl->array;
170e2900954SStan Tomov   } else if (mtype == CEED_MEM_DEVICE) {
171c119dad6SStan Tomov     if (impl->darray == NULL) {
172c119dad6SStan Tomov       // Vector doesn't own the data and was set on the CPU
173c119dad6SStan Tomov       if (impl->array == NULL) {
174c119dad6SStan Tomov         // call was made just to allocate memory
175e2900954SStan Tomov         ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
176e2900954SStan Tomov         CeedChk(ierr);
177c119dad6SStan Tomov       } else
178c119dad6SStan Tomov         return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE");
179e2900954SStan Tomov     }
180e2900954SStan Tomov     *array = impl->darray;
181e2900954SStan Tomov   } else
182e2900954SStan Tomov     return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory");
183e2900954SStan Tomov 
18482b77998SStan Tomov   return 0;
18582b77998SStan Tomov }
18682b77998SStan Tomov 
18793fbe329SStan Tomov // *****************************************************************************
188e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it
18993fbe329SStan Tomov // *****************************************************************************
19082b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype,
19182b77998SStan Tomov                                         const CeedScalar **array) {
19282b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
19382b77998SStan Tomov   int ierr;
19482b77998SStan Tomov 
195e2900954SStan Tomov   if (mtype == CEED_MEM_HOST) {
196c119dad6SStan Tomov     if (impl->own_) {
197e2900954SStan Tomov       // data is owned so GPU had the most up-to-date version; copy it
198e2900954SStan Tomov       magma_getvector(vec->length, sizeof(*array[0]),
199e2900954SStan Tomov                       impl->darray, 1, impl->array, 1);
200c119dad6SStan Tomov     } else if (impl->array == NULL) {
201c119dad6SStan Tomov       // Vector doesn't own the data and was set on GPU
202c119dad6SStan Tomov       if (impl->darray == NULL) {
203c119dad6SStan Tomov         // call was made just to allocate memory
204c119dad6SStan Tomov         ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
205c119dad6SStan Tomov         CeedChk(ierr);
206c119dad6SStan Tomov       } else
207c119dad6SStan Tomov         return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST");
20882b77998SStan Tomov     }
20982b77998SStan Tomov     *array = impl->array;
210e2900954SStan Tomov   } else if (mtype == CEED_MEM_DEVICE) {
211c119dad6SStan Tomov     if (impl->darray == NULL) {
212c119dad6SStan Tomov       // Vector doesn't own the data and was set on the CPU
213c119dad6SStan Tomov       if (impl->array == NULL) {
214c119dad6SStan Tomov         // call was made just to allocate memory
215e2900954SStan Tomov         ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
216e2900954SStan Tomov         CeedChk(ierr);
217c119dad6SStan Tomov       } else
218c119dad6SStan Tomov         return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE");
219e2900954SStan Tomov     }
220e2900954SStan Tomov     *array = impl->darray;
221e2900954SStan Tomov   } else
222e2900954SStan Tomov     return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory");
223e2900954SStan Tomov 
22482b77998SStan Tomov   return 0;
22582b77998SStan Tomov }
22682b77998SStan Tomov 
22793fbe329SStan Tomov // *****************************************************************************
228e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST
229e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if
230e2900954SStan Tomov // * it was GPU memory we would not call this routine at all.
23193fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values
23293fbe329SStan Tomov // * from vec and possibly modified them.
23393fbe329SStan Tomov // *****************************************************************************
23482b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) {
23593fbe329SStan Tomov   CeedVector_Magma *impl = vec->data;
23693fbe329SStan Tomov 
237c119dad6SStan Tomov   // Check if the array is a CPU pointer
238c119dad6SStan Tomov   if (*array == impl->array) {
239c119dad6SStan Tomov     // Update device, if the device pointer is not NULL
240c119dad6SStan Tomov     if (impl->darray != NULL) {
24193fbe329SStan Tomov       magma_setvector(vec->length, sizeof(*array[0]),
24293fbe329SStan Tomov                       *array, 1, impl->darray, 1);
2435a9ca9adSVeselin Dobrev     } else {
2445a9ca9adSVeselin Dobrev       // nothing to do (case of CPU use pointer)
2455a9ca9adSVeselin Dobrev     }
246c119dad6SStan Tomov 
247c119dad6SStan Tomov   } else if (impl->down_) {
248c119dad6SStan Tomov     // nothing to do if array is on GPU, except if down_=1(case CPU use pointer)
249c119dad6SStan Tomov     magma_getvector(vec->length, sizeof(*array[0]),
250c119dad6SStan Tomov                     impl->darray, 1, impl->array, 1);
251c119dad6SStan Tomov   }
252e2900954SStan Tomov 
25382b77998SStan Tomov   *array = NULL;
25482b77998SStan Tomov   return 0;
25582b77998SStan Tomov }
25682b77998SStan Tomov 
25793fbe329SStan Tomov // *****************************************************************************
258e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST
259e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if
260e2900954SStan Tomov // * it was GPU memory we would not call this routine at all.
26193fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values
26293fbe329SStan Tomov // * from vec to only read them; in this case vec may have been modified meanwhile
26393fbe329SStan Tomov // * and needs to be restored here.
26493fbe329SStan Tomov // *****************************************************************************
26582b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec,
26682b77998SStan Tomov     const CeedScalar **array) {
26793fbe329SStan Tomov   CeedVector_Magma *impl = vec->data;
26893fbe329SStan Tomov 
269c119dad6SStan Tomov   // Check if the array is a CPU pointer
270c119dad6SStan Tomov   if (*array == impl->array) {
271c119dad6SStan Tomov     // Update device, if the device pointer is not NULL
2725a9ca9adSVeselin Dobrev     if (impl->darray != NULL) {
27393fbe329SStan Tomov       magma_setvector(vec->length, sizeof(*array[0]),
27493fbe329SStan Tomov                       *array, 1, impl->darray, 1);
2755a9ca9adSVeselin Dobrev     } else {
2765a9ca9adSVeselin Dobrev       // nothing to do (case of CPU use pointer)
2775a9ca9adSVeselin Dobrev     }
278c119dad6SStan Tomov 
279c119dad6SStan Tomov   } else if (impl->down_) {
280c119dad6SStan Tomov     // nothing to do if array is on GPU, except if down_=1(case CPU use pointer)
281c119dad6SStan Tomov     magma_getvector(vec->length, sizeof(*array[0]),
282c119dad6SStan Tomov                     impl->darray, 1, impl->array, 1);
283c119dad6SStan Tomov   }
284e2900954SStan Tomov 
28582b77998SStan Tomov   *array = NULL;
28682b77998SStan Tomov   return 0;
28782b77998SStan Tomov }
28882b77998SStan Tomov 
28982b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) {
29082b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
29182b77998SStan Tomov   int ierr;
29282b77998SStan Tomov 
293e2900954SStan Tomov   // Free if we own the data
294e2900954SStan Tomov   if (impl->own_) {
295e2900954SStan Tomov     ierr = magma_free_pinned(impl->array); CeedChk(ierr);
29693fbe329SStan Tomov     ierr = magma_free(impl->darray);       CeedChk(ierr);
297c119dad6SStan Tomov   } else if (impl->down_) {
298c119dad6SStan Tomov     ierr = magma_free(impl->darray);       CeedChk(ierr);
299e2900954SStan Tomov   }
30082b77998SStan Tomov   ierr = CeedFree(&vec->data); CeedChk(ierr);
30182b77998SStan Tomov   return 0;
30282b77998SStan Tomov }
30382b77998SStan Tomov 
30493fbe329SStan Tomov // *****************************************************************************
30593fbe329SStan Tomov // * Create vector vec of size n
30693fbe329SStan Tomov // *****************************************************************************
307667bc5fcSjeremylt static int CeedVectorCreate_Magma(CeedInt n, CeedVector vec) {
30882b77998SStan Tomov   CeedVector_Magma *impl;
30982b77998SStan Tomov   int ierr;
31082b77998SStan Tomov 
31182b77998SStan Tomov   vec->SetArray = CeedVectorSetArray_Magma;
31282b77998SStan Tomov   vec->GetArray = CeedVectorGetArray_Magma;
31382b77998SStan Tomov   vec->GetArrayRead = CeedVectorGetArrayRead_Magma;
31482b77998SStan Tomov   vec->RestoreArray = CeedVectorRestoreArray_Magma;
31582b77998SStan Tomov   vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma;
31682b77998SStan Tomov   vec->Destroy = CeedVectorDestroy_Magma;
31782b77998SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
31893fbe329SStan Tomov   impl->darray = NULL;
319e2900954SStan Tomov   impl->array  = NULL;
320e2900954SStan Tomov   impl->own_ = 0;
321c119dad6SStan Tomov   impl->down_= 0;
32282b77998SStan Tomov   vec->data = impl;
32382b77998SStan Tomov   return 0;
32482b77998SStan Tomov }
32582b77998SStan Tomov 
3263bfcb0deSjeremylt 
32793fbe329SStan Tomov // *****************************************************************************
32893fbe329SStan Tomov // * Apply restriction operator r to u: v = r(rmode) u
32993fbe329SStan Tomov // *****************************************************************************
33082b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r,
3313bfcb0deSjeremylt     CeedTransposeMode tmode,
33282b77998SStan Tomov     CeedTransposeMode lmode, CeedVector u,
33382b77998SStan Tomov     CeedVector v, CeedRequest *request) {
33482b77998SStan Tomov   CeedElemRestriction_Magma *impl = r->data;
33582b77998SStan Tomov   int ierr;
33682b77998SStan Tomov   const CeedScalar *uu;
33782b77998SStan Tomov   CeedScalar *vv;
3383bfcb0deSjeremylt   CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof,
3393bfcb0deSjeremylt           ncomp=r->ncomp;
340c119dad6SStan Tomov   CeedInt esize = nelem * elemsize;
34106b636dbSStan Tomov 
34206b636dbSStan Tomov   #ifdef USE_MAGMA_BATCH2
343137a0714SStan Tomov   CeedInt *dindices = impl->dindices;
344c119dad6SStan Tomov   // Get pointers on the device
345c119dad6SStan Tomov   ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr);
346c119dad6SStan Tomov   ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr);
34706b636dbSStan Tomov   #else
34806b636dbSStan Tomov   CeedInt *indices = impl->indices;
34906b636dbSStan Tomov   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
35006b636dbSStan Tomov   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
35106b636dbSStan Tomov   #endif
352c119dad6SStan Tomov 
35382b77998SStan Tomov   if (tmode == CEED_NOTRANSPOSE) {
35482b77998SStan Tomov     // Perform: v = r * u
355135a076eSjeremylt     if (!impl->indices) {
356135a076eSjeremylt       for (CeedInt i=0; i<esize*ncomp; i++) vv[i] = uu[i];
357135a076eSjeremylt     } else if (ncomp == 1) {
35806b636dbSStan Tomov       #ifdef USE_MAGMA_BATCH2
359c119dad6SStan Tomov magma_template<<i=0:esize>>
360c119dad6SStan Tomov       (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
361c119dad6SStan Tomov         vv[i] = uu[dindices[i]];
362e2900954SStan Tomov       }
36306b636dbSStan Tomov       #else
36406b636dbSStan Tomov       for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]];
36506b636dbSStan Tomov       #endif
36682b77998SStan Tomov     } else {
36782b77998SStan Tomov       // vv is (elemsize x ncomp x nelem), column-major
36882b77998SStan Tomov       if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major
36906b636dbSStan Tomov         #ifdef USE_MAGMA_BATCH2
37006b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
37106b636dbSStan Tomov         (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) {
37206b636dbSStan Tomov           vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d];
37306b636dbSStan Tomov         }
37406b636dbSStan Tomov         #else
375c119dad6SStan Tomov         for (CeedInt e = 0; e < nelem; e++)
37682b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
377c119dad6SStan Tomov             for (CeedInt i=0; i < elemsize; i++) {
378c119dad6SStan Tomov               vv[i + elemsize*(d+ncomp*e)] =
379c119dad6SStan Tomov                 uu[indices[i+elemsize*e]+ndof*d];
380c119dad6SStan Tomov             }
38106b636dbSStan Tomov         #endif
38282b77998SStan Tomov       } else { // u is (ncomp x ndof), column-major
38306b636dbSStan Tomov         #ifdef USE_MAGMA_BATCH2
38406b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
38506b636dbSStan Tomov         (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
38606b636dbSStan Tomov           vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]];
38706b636dbSStan Tomov         }
38806b636dbSStan Tomov         #else
389c119dad6SStan Tomov         for (CeedInt e = 0; e < nelem; e++)
39082b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
391c119dad6SStan Tomov             for (CeedInt i=0; i< elemsize; i++) {
392c119dad6SStan Tomov               vv[i + elemsize*(d+ncomp*e)] =
393c119dad6SStan Tomov                 uu[d+ncomp*indices[i+elemsize*e]];
394c119dad6SStan Tomov             }
39506b636dbSStan Tomov         #endif
39682b77998SStan Tomov       }
39782b77998SStan Tomov     }
39882b77998SStan Tomov   } else {
39982b77998SStan Tomov     // Note: in transpose mode, we perform: v += r^t * u
400135a076eSjeremylt     if (!impl->indices) {
401135a076eSjeremylt       for (CeedInt i=0; i<esize; i++) vv[i] += uu[i];
402135a076eSjeremylt     } else if (ncomp == 1) {
403c119dad6SStan Tomov       // fprintf(stderr,"3 ---------\n");
40406b636dbSStan Tomov       #ifdef USE_MAGMA_BATCH2
405c119dad6SStan Tomov magma_template<<i=0:esize>>
406c119dad6SStan Tomov       (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
407c119dad6SStan Tomov         magmablas_datomic_add( &vv[dindices[i]], uu[i]);
408c119dad6SStan Tomov       }
40906b636dbSStan Tomov       #else
41006b636dbSStan Tomov       for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i];
41106b636dbSStan Tomov       #endif
412c119dad6SStan Tomov     } else { // u is (elemsize x ncomp x nelem)
413c119dad6SStan Tomov       fprintf(stderr,"2 ---------\n");
414c119dad6SStan Tomov 
41582b77998SStan Tomov       if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major
41606b636dbSStan Tomov         #ifdef USE_MAGMA_BATCH2
41706b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
41806b636dbSStan Tomov         (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) {
41906b636dbSStan Tomov           magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d],
42006b636dbSStan Tomov                                  uu[i+iend*(d+e*dend)]);
42106b636dbSStan Tomov         }
42206b636dbSStan Tomov         #else
423c119dad6SStan Tomov         for (CeedInt e = 0; e < nelem; e++)
42482b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
425c119dad6SStan Tomov             for (CeedInt i=0; i < elemsize; i++) {
426c119dad6SStan Tomov               vv[indices[i + elemsize*e]+ndof*d] +=
427c119dad6SStan Tomov                 uu[i + elemsize*(d+e*ncomp)];
428c119dad6SStan Tomov             }
42906b636dbSStan Tomov         #endif
43006b636dbSStan Tomov       } else { // vv is (ncomp x ndof), column-major
43106b636dbSStan Tomov         #ifdef USE_MAGMA_BATCH2
432c119dad6SStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
43306b636dbSStan Tomov         (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
43406b636dbSStan Tomov           magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]],
435c119dad6SStan Tomov                                  uu[i+iend*(d+e*dend)]);
43682b77998SStan Tomov         }
43706b636dbSStan Tomov         #else
438c119dad6SStan Tomov         for (CeedInt e = 0; e < nelem; e++)
43982b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
440c119dad6SStan Tomov             for (CeedInt i=0; i < elemsize; i++) {
441c119dad6SStan Tomov               vv[d+ncomp*indices[i + elemsize*e]] +=
442c119dad6SStan Tomov                 uu[i + elemsize*(d+e*ncomp)];
443c119dad6SStan Tomov             }
44406b636dbSStan Tomov         #endif
44582b77998SStan Tomov       }
44682b77998SStan Tomov     }
44782b77998SStan Tomov   }
44806b636dbSStan Tomov 
44982b77998SStan Tomov   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
45082b77998SStan Tomov   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
451c119dad6SStan Tomov 
45282b77998SStan Tomov   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
45382b77998SStan Tomov     *request = NULL;
45482b77998SStan Tomov   return 0;
45582b77998SStan Tomov }
45682b77998SStan Tomov 
45782b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) {
45882b77998SStan Tomov   CeedElemRestriction_Magma *impl = r->data;
45982b77998SStan Tomov   int ierr;
46082b77998SStan Tomov 
4611751b0a9SStan Tomov   // Free if we own the data
4621751b0a9SStan Tomov   if (impl->own_) {
4631751b0a9SStan Tomov     ierr = magma_free_pinned(impl->indices); CeedChk(ierr);
4641751b0a9SStan Tomov     ierr = magma_free(impl->dindices);       CeedChk(ierr);
4651981b6e4STzanio   } else if (impl->down_) {
466c119dad6SStan Tomov     ierr = magma_free(impl->dindices);       CeedChk(ierr);
467c119dad6SStan Tomov   }
46882b77998SStan Tomov   ierr = CeedFree(&r->data); CeedChk(ierr);
46982b77998SStan Tomov   return 0;
47082b77998SStan Tomov }
47182b77998SStan Tomov 
472667bc5fcSjeremylt static int CeedElemRestrictionCreate_Magma(CeedMemType mtype,
47393fbe329SStan Tomov     CeedCopyMode cmode,
474667bc5fcSjeremylt     const CeedInt *indices, CeedElemRestriction r) {
4751751b0a9SStan Tomov   int ierr, size = r->nelem*r->elemsize;
47682b77998SStan Tomov   CeedElemRestriction_Magma *impl;
47782b77998SStan Tomov 
4781751b0a9SStan Tomov   // Allocate memory for the MAGMA Restricton and initializa pointers to NULL
47982b77998SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
4801751b0a9SStan Tomov   impl->dindices = NULL;
4811751b0a9SStan Tomov   impl->indices  = NULL;
4821751b0a9SStan Tomov   impl->own_ = 0;
483c119dad6SStan Tomov   impl->down_= 0;
4841751b0a9SStan Tomov 
4851751b0a9SStan Tomov   if (mtype == CEED_MEM_HOST) {
4861751b0a9SStan Tomov     // memory is on the host; own_ = 0
48782b77998SStan Tomov     switch (cmode) {
48882b77998SStan Tomov     case CEED_COPY_VALUES:
4891751b0a9SStan Tomov       ierr = magma_malloc( (void **)&impl->dindices,
4901751b0a9SStan Tomov                            size * sizeof(CeedInt)); CeedChk(ierr);
4911751b0a9SStan Tomov       ierr = magma_malloc_pinned( (void **)&impl->indices,
4921751b0a9SStan Tomov                                   size * sizeof(CeedInt)); CeedChk(ierr);
4931751b0a9SStan Tomov       impl->own_ = 1;
4941751b0a9SStan Tomov 
49506b636dbSStan Tomov       if (indices != NULL) {
49606b636dbSStan Tomov         memcpy(impl->indices, indices, size * sizeof(indices[0]));
4971751b0a9SStan Tomov         magma_setvector(size, sizeof(CeedInt),
49806b636dbSStan Tomov                         impl->indices, 1, impl->dindices, 1);
49906b636dbSStan Tomov       }
50082b77998SStan Tomov       break;
50182b77998SStan Tomov     case CEED_OWN_POINTER:
5021751b0a9SStan Tomov       ierr = magma_malloc( (void **)&impl->dindices,
5031751b0a9SStan Tomov                            size * sizeof(CeedInt)); CeedChk(ierr);
5041751b0a9SStan Tomov       // TODO: possible problem here is if we are passed non-pinned memory;
5051751b0a9SStan Tomov       //       (as we own it, lter in destroy, we use free for pinned memory).
506137a0714SStan Tomov       impl->indices = (CeedInt *)indices;
5071751b0a9SStan Tomov       impl->own_ = 1;
5081751b0a9SStan Tomov 
509c119dad6SStan Tomov       if (indices != NULL)
5101751b0a9SStan Tomov         magma_setvector(size, sizeof(CeedInt),
5111751b0a9SStan Tomov                         indices, 1, impl->dindices, 1);
51282b77998SStan Tomov       break;
51382b77998SStan Tomov     case CEED_USE_POINTER:
514c119dad6SStan Tomov       ierr = magma_malloc( (void **)&impl->dindices,
515c119dad6SStan Tomov                            size * sizeof(CeedInt)); CeedChk(ierr);
516c119dad6SStan Tomov       magma_setvector(size, sizeof(CeedInt),
517c119dad6SStan Tomov                       indices, 1, impl->dindices, 1);
518c119dad6SStan Tomov       impl->down_ = 1;
519137a0714SStan Tomov       impl->indices  = (CeedInt *)indices;
52082b77998SStan Tomov     }
5211751b0a9SStan Tomov   } else if (mtype == CEED_MEM_DEVICE) {
5221751b0a9SStan Tomov     // memory is on the device; own = 0
5231751b0a9SStan Tomov     switch (cmode) {
5241751b0a9SStan Tomov     case CEED_COPY_VALUES:
5251751b0a9SStan Tomov       ierr = magma_malloc( (void **)&impl->dindices,
5261751b0a9SStan Tomov                            size * sizeof(CeedInt)); CeedChk(ierr);
5271751b0a9SStan Tomov       ierr = magma_malloc_pinned( (void **)&impl->indices,
5281751b0a9SStan Tomov                                   size * sizeof(CeedInt)); CeedChk(ierr);
5291751b0a9SStan Tomov       impl->own_ = 1;
5301751b0a9SStan Tomov 
5311751b0a9SStan Tomov       if (indices)
5321751b0a9SStan Tomov         magma_copyvector(size, sizeof(CeedInt),
5331751b0a9SStan Tomov                          indices, 1, impl->dindices, 1);
5341751b0a9SStan Tomov       break;
5351751b0a9SStan Tomov     case CEED_OWN_POINTER:
536137a0714SStan Tomov       impl->dindices = (CeedInt *)indices;
5371751b0a9SStan Tomov       ierr = magma_malloc_pinned( (void **)&impl->indices,
5381751b0a9SStan Tomov                                   size * sizeof(CeedInt)); CeedChk(ierr);
5391751b0a9SStan Tomov       impl->own_ = 1;
5401751b0a9SStan Tomov 
5411751b0a9SStan Tomov       break;
5421751b0a9SStan Tomov     case CEED_USE_POINTER:
543137a0714SStan Tomov       impl->dindices = (CeedInt *)indices;
5441751b0a9SStan Tomov       impl->indices  = NULL;
5451751b0a9SStan Tomov     }
5461751b0a9SStan Tomov 
5471751b0a9SStan Tomov   } else
5481751b0a9SStan Tomov     return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported");
5491751b0a9SStan Tomov 
55082b77998SStan Tomov   r->data    = impl;
55182b77998SStan Tomov   r->Apply   = CeedElemRestrictionApply_Magma;
55282b77998SStan Tomov   r->Destroy = CeedElemRestrictionDestroy_Magma;
5531751b0a9SStan Tomov 
55482b77998SStan Tomov   return 0;
55582b77998SStan Tomov }
55682b77998SStan Tomov 
557667bc5fcSjeremylt static int CeedElemRestrictionCreateBlocked_Magma(CeedMemType mtype,
5584cc6aec3Sjeremylt     CeedCopyMode cmode,
559667bc5fcSjeremylt     const CeedInt *indices, CeedElemRestriction r) {
5604cc6aec3Sjeremylt   return CeedError(r->ceed, 1, "Backend does not implement blocked restrictions");
5614cc6aec3Sjeremylt }
5624cc6aec3Sjeremylt 
56382b77998SStan Tomov // Contracts on the middle index
56482b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc
56582b77998SStan Tomov // TRANSPOSE:   V_ajc = T_bj U_abc
56682b77998SStan Tomov // If Add != 0, "=" is replaced by "+="
56782b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed,
56882b77998SStan Tomov                                     CeedInt A, CeedInt B, CeedInt C, CeedInt J,
56982b77998SStan Tomov                                     const CeedScalar *t, CeedTransposeMode tmode,
57082b77998SStan Tomov                                     const CeedInt Add,
57182b77998SStan Tomov                                     const CeedScalar *u, CeedScalar *v) {
57238612b08SStan Tomov   #ifdef USE_MAGMA_BATCH
57338612b08SStan Tomov   magma_dtensor_contract(ceed, A, B, C, J, t, tmode, Add, u, v);
57438612b08SStan Tomov   #else
57582b77998SStan Tomov   CeedInt tstride0 = B, tstride1 = 1;
57682b77998SStan Tomov   if (tmode == CEED_TRANSPOSE) {
57782b77998SStan Tomov     tstride0 = 1; tstride1 = J;
57882b77998SStan Tomov   }
57938612b08SStan Tomov   CeedDebug("\033[31m[CeedTensorContract] A=%d, J=%d, C=%d, B=%d: %d %d %d",
58038612b08SStan Tomov             A,J,C,B,A*J*B*C, C*J*A, C*B*A);
58182b77998SStan Tomov   for (CeedInt a=0; a<A; a++) {
58282b77998SStan Tomov     for (CeedInt j=0; j<J; j++) {
58382b77998SStan Tomov       if (!Add) {
58482b77998SStan Tomov         for (CeedInt c=0; c<C; c++)
58582b77998SStan Tomov           v[(a*J+j)*C+c] = 0;
58682b77998SStan Tomov       }
58782b77998SStan Tomov       for (CeedInt b=0; b<B; b++) {
58882b77998SStan Tomov         for (CeedInt c=0; c<C; c++) {
58982b77998SStan Tomov           v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c];
59082b77998SStan Tomov         }
59182b77998SStan Tomov       }
59282b77998SStan Tomov     }
59382b77998SStan Tomov   }
59438612b08SStan Tomov   #endif
59582b77998SStan Tomov   return 0;
59682b77998SStan Tomov }
59782b77998SStan Tomov 
598d3181881Sjeremylt static int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
599d3181881Sjeremylt                                 CeedTransposeMode tmode, CeedEvalMode emode,
60016383ffcSjeremylt                                 CeedVector U, CeedVector V) {
60182b77998SStan Tomov   int ierr;
60282b77998SStan Tomov   const CeedInt dim = basis->dim;
6030f5de9e9Sjeremylt   const CeedInt ncomp = basis->ncomp;
6040f5de9e9Sjeremylt   const CeedInt nqpt = ncomp*CeedPowInt(basis->Q1d, dim);
60582b77998SStan Tomov   const CeedInt add = (tmode == CEED_TRANSPOSE);
60616383ffcSjeremylt   const CeedScalar *u;
60716383ffcSjeremylt   CeedScalar *v;
60816383ffcSjeremylt   if (U) {
60916383ffcSjeremylt     ierr = CeedVectorGetArrayRead(U, CEED_MEM_HOST, &u); CeedChk(ierr);
61016383ffcSjeremylt   } else if (emode != CEED_EVAL_WEIGHT) {
61116383ffcSjeremylt     return CeedError(ceed, 1,
61216383ffcSjeremylt                      "An input vector is required for this CeedEvalMode");
61316383ffcSjeremylt   }
61416383ffcSjeremylt   ierr = CeedVectorGetArray(V, CEED_MEM_HOST, &v); CeedChk(ierr);
61582b77998SStan Tomov 
616c44a85f4Sjeremylt   if (nelem != 1)
617c44a85f4Sjeremylt     return CeedError(basis->ceed, 1,
618c44a85f4Sjeremylt                      "This backend does not support BasisApply for multiple elements");
619c44a85f4Sjeremylt 
62038612b08SStan Tomov   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d",
6210f5de9e9Sjeremylt             ncomp*CeedPowInt(basis->P1d, dim));
62238612b08SStan Tomov 
62382b77998SStan Tomov   if (tmode == CEED_TRANSPOSE) {
6240f5de9e9Sjeremylt     const CeedInt vsize = ncomp*CeedPowInt(basis->P1d, dim);
62582b77998SStan Tomov     for (CeedInt i = 0; i < vsize; i++)
62682b77998SStan Tomov       v[i] = (CeedScalar) 0;
62782b77998SStan Tomov   }
62882b77998SStan Tomov   if (emode & CEED_EVAL_INTERP) {
62982b77998SStan Tomov     CeedInt P = basis->P1d, Q = basis->Q1d;
63082b77998SStan Tomov     if (tmode == CEED_TRANSPOSE) {
63182b77998SStan Tomov       P = basis->Q1d; Q = basis->P1d;
63282b77998SStan Tomov     }
6330f5de9e9Sjeremylt     CeedInt pre = ncomp*CeedPowInt(P, dim-1), post = 1;
6340f5de9e9Sjeremylt     CeedScalar tmp[2][ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)];
63538612b08SStan Tomov     CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d",
6360f5de9e9Sjeremylt               ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1));
63782b77998SStan Tomov     for (CeedInt d=0; d<dim; d++) {
63882b77998SStan Tomov       ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d,
63982b77998SStan Tomov                                       tmode, add&&(d==dim-1),
64082b77998SStan Tomov                                       d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
64182b77998SStan Tomov       CeedChk(ierr);
64282b77998SStan Tomov       pre /= P;
64382b77998SStan Tomov       post *= Q;
64482b77998SStan Tomov     }
64582b77998SStan Tomov     if (tmode == CEED_NOTRANSPOSE) {
64682b77998SStan Tomov       v += nqpt;
64782b77998SStan Tomov     } else {
64882b77998SStan Tomov       u += nqpt;
64982b77998SStan Tomov     }
65082b77998SStan Tomov   }
65182b77998SStan Tomov   if (emode & CEED_EVAL_GRAD) {
65282b77998SStan Tomov     CeedInt P = basis->P1d, Q = basis->Q1d;
65382b77998SStan Tomov     // In CEED_NOTRANSPOSE mode:
6540f5de9e9Sjeremylt     // u is (P^dim x nc), column-major layout (nc = ncomp)
6550f5de9e9Sjeremylt     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
65682b77998SStan Tomov     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
65782b77998SStan Tomov     if (tmode == CEED_TRANSPOSE) {
65882b77998SStan Tomov       P = basis->Q1d, Q = basis->P1d;
65982b77998SStan Tomov     }
6600f5de9e9Sjeremylt     CeedScalar tmp[2][ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)];
66138612b08SStan Tomov     CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d",
6620f5de9e9Sjeremylt               ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1));
66382b77998SStan Tomov     for (CeedInt p = 0; p < dim; p++) {
6640f5de9e9Sjeremylt       CeedInt pre = ncomp*CeedPowInt(P, dim-1), post = 1;
66582b77998SStan Tomov       for (CeedInt d=0; d<dim; d++) {
66682b77998SStan Tomov         ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q,
66782b77998SStan Tomov                                         (p==d)?basis->grad1d:basis->interp1d,
66882b77998SStan Tomov                                         tmode, add&&(d==dim-1),
66982b77998SStan Tomov                                         d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
67082b77998SStan Tomov         CeedChk(ierr);
67182b77998SStan Tomov         pre /= P;
67282b77998SStan Tomov         post *= Q;
67382b77998SStan Tomov       }
67482b77998SStan Tomov       if (tmode == CEED_NOTRANSPOSE) {
67582b77998SStan Tomov         v += nqpt;
67682b77998SStan Tomov       } else {
67782b77998SStan Tomov         u += nqpt;
67882b77998SStan Tomov       }
67982b77998SStan Tomov     }
68082b77998SStan Tomov   }
68182b77998SStan Tomov   if (emode & CEED_EVAL_WEIGHT) {
68282b77998SStan Tomov     if (tmode == CEED_TRANSPOSE)
68382b77998SStan Tomov       return CeedError(basis->ceed, 1,
68482b77998SStan Tomov                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
68582b77998SStan Tomov     CeedInt Q = basis->Q1d;
68682b77998SStan Tomov     for (CeedInt d=0; d<dim; d++) {
68782b77998SStan Tomov       CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d);
68882b77998SStan Tomov       for (CeedInt i=0; i<pre; i++) {
68982b77998SStan Tomov         for (CeedInt j=0; j<Q; j++) {
69082b77998SStan Tomov           for (CeedInt k=0; k<post; k++) {
69182b77998SStan Tomov             v[(i*Q + j)*post + k] = basis->qweight1d[j]
69282b77998SStan Tomov                                     * (d == 0 ? 1 : v[(i*Q + j)*post + k]);
69382b77998SStan Tomov           }
69482b77998SStan Tomov         }
69582b77998SStan Tomov       }
69682b77998SStan Tomov     }
69782b77998SStan Tomov   }
69816383ffcSjeremylt   if (U) {
69916383ffcSjeremylt     ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr);
70016383ffcSjeremylt   }
70116383ffcSjeremylt   ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr);
70282b77998SStan Tomov   return 0;
70382b77998SStan Tomov }
70482b77998SStan Tomov 
70582b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) {
70682b77998SStan Tomov   return 0;
70782b77998SStan Tomov }
70882b77998SStan Tomov 
709667bc5fcSjeremylt static int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
71082b77998SStan Tomov     CeedInt Q1d, const CeedScalar *interp1d,
71182b77998SStan Tomov     const CeedScalar *grad1d,
71282b77998SStan Tomov     const CeedScalar *qref1d,
71382b77998SStan Tomov     const CeedScalar *qweight1d,
71482b77998SStan Tomov     CeedBasis basis) {
71582b77998SStan Tomov   basis->Apply = CeedBasisApply_Magma;
71682b77998SStan Tomov   basis->Destroy = CeedBasisDestroy_Magma;
71782b77998SStan Tomov   return 0;
71882b77998SStan Tomov }
71982b77998SStan Tomov 
720667bc5fcSjeremylt static int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
721a8de75f0Sjeremylt                                    CeedInt ndof, CeedInt nqpts,
722a8de75f0Sjeremylt                                    const CeedScalar *interp,
723a8de75f0Sjeremylt                                    const CeedScalar *grad,
724a8de75f0Sjeremylt                                    const CeedScalar *qref,
725a8de75f0Sjeremylt                                    const CeedScalar *qweight,
726a8de75f0Sjeremylt                                    CeedBasis basis) {
727a8de75f0Sjeremylt   return CeedError(basis->ceed, 1, "Backend does not implement non-tensor bases");
728a8de75f0Sjeremylt }
729a8de75f0Sjeremylt 
7303bfcb0deSjeremylt static int CeedQFunctionApply_Magma(CeedQFunction qf, CeedInt Q,
73116383ffcSjeremylt                                     CeedVector *U, CeedVector *V) {
73282b77998SStan Tomov   int ierr;
73316383ffcSjeremylt   CeedQFunction_Ref *impl;
73416383ffcSjeremylt   ierr = CeedQFunctionGetData(qf, (void *)&impl); CeedChk(ierr);
73516383ffcSjeremylt 
73616383ffcSjeremylt   void *ctx;
73716383ffcSjeremylt   ierr = CeedQFunctionGetContext(qf, &ctx); CeedChk(ierr);
73816383ffcSjeremylt 
73916383ffcSjeremylt   int (*f)() = NULL;
74016383ffcSjeremylt   ierr = CeedQFunctionGetUserFunction(qf, (int (* *)())&f); CeedChk(ierr);
74116383ffcSjeremylt 
74216383ffcSjeremylt   CeedInt nIn, nOut;
74316383ffcSjeremylt   ierr = CeedQFunctionGetNumArgs(qf, &nIn, &nOut); CeedChk(ierr);
74416383ffcSjeremylt 
74516383ffcSjeremylt   for (int i = 0; i<nIn; i++) {
74616383ffcSjeremylt     if (U[i]) {
74716383ffcSjeremylt       ierr = CeedVectorGetArrayRead(U[i], CEED_MEM_HOST, &impl->inputs[i]);
74816383ffcSjeremylt       CeedChk(ierr);
74916383ffcSjeremylt     }
75016383ffcSjeremylt   }
75116383ffcSjeremylt   for (int i = 0; i<nOut; i++) {
75216383ffcSjeremylt     if (U[i]) {
75316383ffcSjeremylt       ierr = CeedVectorGetArray(V[i], CEED_MEM_HOST, &impl->outputs[i]);
75416383ffcSjeremylt       CeedChk(ierr);
75516383ffcSjeremylt     }
75616383ffcSjeremylt   }
75716383ffcSjeremylt 
75816383ffcSjeremylt   ierr = f(ctx, Q, impl->inputs, impl->outputs); CeedChk(ierr);
75916383ffcSjeremylt 
76016383ffcSjeremylt   for (int i = 0; i<nIn; i++) {
76116383ffcSjeremylt     if (U[i]) {
76216383ffcSjeremylt       ierr = CeedVectorRestoreArrayRead(U[i], &impl->inputs[i]); CeedChk(ierr);
76316383ffcSjeremylt     }
76416383ffcSjeremylt   }
76516383ffcSjeremylt   for (int i = 0; i<nOut; i++) {
76616383ffcSjeremylt     if (U[i]) {
76716383ffcSjeremylt       ierr = CeedVectorRestoreArray(V[i], &impl->outputs[i]); CeedChk(ierr);
76816383ffcSjeremylt     }
76916383ffcSjeremylt   }
77082b77998SStan Tomov   return 0;
77182b77998SStan Tomov }
77282b77998SStan Tomov 
77382b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) {
77416383ffcSjeremylt   int ierr;
77516383ffcSjeremylt   CeedQFunction_Magma *impl;
77616383ffcSjeremylt   ierr = CeedQFunctionGetData(qf, (void *)&impl); CeedChk(ierr);
77716383ffcSjeremylt 
77816383ffcSjeremylt   ierr = CeedFree(&impl->inputs); CeedChk(ierr);
77916383ffcSjeremylt   ierr = CeedFree(&impl->outputs); CeedChk(ierr);
78016383ffcSjeremylt   ierr = CeedFree(&impl); CeedChk(ierr);
78116383ffcSjeremylt 
78282b77998SStan Tomov   return 0;
78382b77998SStan Tomov }
78482b77998SStan Tomov 
78582b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) {
78616383ffcSjeremylt   int ierr;
78716383ffcSjeremylt   Ceed ceed;
78816383ffcSjeremylt   ierr = CeedQFunctionGetCeed(qf, &ceed); CeedChk(ierr);
78916383ffcSjeremylt 
79016383ffcSjeremylt   CeedQFunction_Magma *impl;
79116383ffcSjeremylt   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
79216383ffcSjeremylt   ierr = CeedCalloc(16, &impl->inputs); CeedChk(ierr);
79316383ffcSjeremylt   ierr = CeedCalloc(16, &impl->outputs); CeedChk(ierr);
79416383ffcSjeremylt   ierr = CeedQFunctionSetData(qf, (void *)&impl); CeedChk(ierr);
79516383ffcSjeremylt 
79682b77998SStan Tomov   qf->Apply = CeedQFunctionApply_Magma;
79782b77998SStan Tomov   qf->Destroy = CeedQFunctionDestroy_Magma;
79882b77998SStan Tomov   return 0;
79982b77998SStan Tomov }
80082b77998SStan Tomov 
80182b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) {
80282b77998SStan Tomov   int ierr;
80316383ffcSjeremylt   CeedOperator_Magma *impl;
80416383ffcSjeremylt   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
80582b77998SStan Tomov 
8063bfcb0deSjeremylt   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
80716383ffcSjeremylt     if (impl->Evecs[i]) {
80816383ffcSjeremylt       ierr = CeedVectorDestroy(&impl->Evecs[i]); CeedChk(ierr);
8093bfcb0deSjeremylt     }
8103bfcb0deSjeremylt   }
81116383ffcSjeremylt   ierr = CeedFree(&impl->Evecs); CeedChk(ierr);
81216383ffcSjeremylt   ierr = CeedFree(&impl->Edata); CeedChk(ierr);
81339dea11bSjeremylt 
81416383ffcSjeremylt   for (CeedInt i=0; i<impl->numein; i++) {
81516383ffcSjeremylt     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
81616383ffcSjeremylt     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
81716383ffcSjeremylt   }
81816383ffcSjeremylt   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
81916383ffcSjeremylt   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
8203bfcb0deSjeremylt 
82116383ffcSjeremylt   for (CeedInt i=0; i<impl->numeout; i++) {
82216383ffcSjeremylt     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
82316383ffcSjeremylt     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
82416383ffcSjeremylt   }
82516383ffcSjeremylt   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
82616383ffcSjeremylt   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
8273bfcb0deSjeremylt 
82816383ffcSjeremylt 
82916383ffcSjeremylt   ierr = CeedFree(&impl); CeedChk(ierr);
83082b77998SStan Tomov   return 0;
83182b77998SStan Tomov }
83282b77998SStan Tomov 
83308ed4d02Sjeremylt 
8343bfcb0deSjeremylt /*
8353bfcb0deSjeremylt   Setup infields or outfields
8363bfcb0deSjeremylt  */
83716383ffcSjeremylt static int CeedOperatorSetupFields_Magma(CeedQFunction qf, CeedOperator op,
83816383ffcSjeremylt     bool inOrOut,
83916383ffcSjeremylt     CeedVector *fullevecs, CeedVector *evecs,
84016383ffcSjeremylt     CeedVector *qvecs, CeedInt starte,
841135a076eSjeremylt     CeedInt numfields, CeedInt Q) {
84216383ffcSjeremylt   CeedInt dim = 1, ierr, ncomp;
84316383ffcSjeremylt   Ceed ceed;
84416383ffcSjeremylt   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
84516383ffcSjeremylt   CeedQFunction_Magma *qf_data;
84616383ffcSjeremylt   ierr = CeedQFunctionGetData(qf, (void *)&qf_data); CeedChk(ierr);
84716383ffcSjeremylt   CeedBasis basis;
84816383ffcSjeremylt   CeedElemRestriction Erestrict;
84916383ffcSjeremylt   CeedOperatorField *opfields;
85016383ffcSjeremylt   CeedQFunctionField *qffields;
85116383ffcSjeremylt   if (inOrOut) {
85216383ffcSjeremylt     ierr = CeedOperatorGetFields(op, NULL, &opfields);
85316383ffcSjeremylt     CeedChk(ierr);
85416383ffcSjeremylt     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
85516383ffcSjeremylt     CeedChk(ierr);
85616383ffcSjeremylt   } else {
85716383ffcSjeremylt     ierr = CeedOperatorGetFields(op, &opfields, NULL);
85816383ffcSjeremylt     CeedChk(ierr);
85916383ffcSjeremylt     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
86016383ffcSjeremylt     CeedChk(ierr);
86116383ffcSjeremylt   }
86282b77998SStan Tomov 
8633bfcb0deSjeremylt   // Loop over fields
8643bfcb0deSjeremylt   for (CeedInt i=0; i<numfields; i++) {
86516383ffcSjeremylt     CeedEvalMode emode;
86616383ffcSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
867135a076eSjeremylt     if (emode != CEED_EVAL_WEIGHT) {
86816383ffcSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &Erestrict);
869135a076eSjeremylt       CeedChk(ierr);
87016383ffcSjeremylt       ierr = CeedElemRestrictionCreateVector(Erestrict, NULL, &fullevecs[i+starte]);
87116383ffcSjeremylt       CeedChk(ierr);
87216383ffcSjeremylt     } else {
873135a076eSjeremylt     }
8743bfcb0deSjeremylt     switch(emode) {
8753bfcb0deSjeremylt     case CEED_EVAL_NONE:
87616383ffcSjeremylt       ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp);
87716383ffcSjeremylt       CeedChk(ierr);
87816383ffcSjeremylt       ierr = CeedVectorCreate(ceed, Q*ncomp, &evecs[i]); CeedChk(ierr);
87916383ffcSjeremylt       ierr = CeedVectorCreate(ceed, Q*ncomp, &qvecs[i]); CeedChk(ierr);
8803bfcb0deSjeremylt       break; // No action
8813bfcb0deSjeremylt     case CEED_EVAL_INTERP:
88216383ffcSjeremylt       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
88316383ffcSjeremylt       ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp);
88416383ffcSjeremylt       CeedChk(ierr);
88516383ffcSjeremylt       ierr = CeedVectorCreate(ceed, Q*ncomp, &evecs[i]); CeedChk(ierr);
88616383ffcSjeremylt       ierr = CeedVectorCreate(ceed, Q*ncomp, &qvecs[i]); CeedChk(ierr);
8873bfcb0deSjeremylt       break;
8883bfcb0deSjeremylt     case CEED_EVAL_GRAD:
88916383ffcSjeremylt       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
89016383ffcSjeremylt       ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp);
89116383ffcSjeremylt       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
89216383ffcSjeremylt       ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp);
89316383ffcSjeremylt       CeedChk(ierr);
89416383ffcSjeremylt       ierr = CeedVectorCreate(ceed, Q*ncomp, &evecs[i]); CeedChk(ierr);
89516383ffcSjeremylt       ierr = CeedVectorCreate(ceed, Q*ncomp*dim, &qvecs[i]); CeedChk(ierr);
8963bfcb0deSjeremylt       break;
8973bfcb0deSjeremylt     case CEED_EVAL_WEIGHT: // Only on input fields
89816383ffcSjeremylt       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
89916383ffcSjeremylt       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
90016383ffcSjeremylt       ierr = CeedVectorCreate(ceed, Q, &qvecs[i]); CeedChk(ierr);
90116383ffcSjeremylt       ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
90216383ffcSjeremylt                             NULL, qvecs[i]); CeedChk(ierr);
90316383ffcSjeremylt       assert(starte==0);
9043bfcb0deSjeremylt       break;
90516383ffcSjeremylt     case CEED_EVAL_DIV: break; // Not implemented
90616383ffcSjeremylt     case CEED_EVAL_CURL: break; // Not implemented
90782b77998SStan Tomov     }
9083bfcb0deSjeremylt   }
90982b77998SStan Tomov   return 0;
91082b77998SStan Tomov }
91182b77998SStan Tomov 
9123bfcb0deSjeremylt /*
9133bfcb0deSjeremylt   CeedOperator needs to connect all the named fields (be they active or passive)
9143bfcb0deSjeremylt   to the named inputs and outputs of its CeedQFunction.
9153bfcb0deSjeremylt  */
9163bfcb0deSjeremylt static int CeedOperatorSetup_Magma(CeedOperator op) {
91782b77998SStan Tomov   int ierr;
91816383ffcSjeremylt   bool setupdone;
91916383ffcSjeremylt   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
92016383ffcSjeremylt   if (setupdone) return 0;
92116383ffcSjeremylt   Ceed ceed;
92216383ffcSjeremylt   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
92316383ffcSjeremylt   CeedOperator_Magma *data;
92416383ffcSjeremylt   ierr = CeedOperatorGetData(op, (void *)&data); CeedChk(ierr);
92516383ffcSjeremylt   CeedQFunction qf;
92616383ffcSjeremylt   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
92716383ffcSjeremylt   CeedInt Q, numinputfields, numoutputfields;
92816383ffcSjeremylt   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
92916383ffcSjeremylt   ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
93016383ffcSjeremylt   CeedOperatorField *opinputfields, *opoutputfields;
93116383ffcSjeremylt   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
93216383ffcSjeremylt   CeedChk(ierr);
93316383ffcSjeremylt   CeedQFunctionField *qfinputfields, *qfoutputfields;
93416383ffcSjeremylt   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
93516383ffcSjeremylt   CeedChk(ierr);
93682b77998SStan Tomov 
93716383ffcSjeremylt   data->numein = numinputfields;
93816383ffcSjeremylt   data->numeout = numoutputfields;
9393bfcb0deSjeremylt 
9403bfcb0deSjeremylt   // Allocate
94116383ffcSjeremylt   const CeedInt numIO = numinputfields + numoutputfields;
94216383ffcSjeremylt 
94316383ffcSjeremylt   ierr = CeedCalloc(numinputfields + numoutputfields, &data->Evecs);
94416383ffcSjeremylt   CeedChk(ierr);
94516383ffcSjeremylt   ierr = CeedCalloc(numinputfields + numoutputfields, &data->Edata);
9463bfcb0deSjeremylt   CeedChk(ierr);
9473bfcb0deSjeremylt 
94816383ffcSjeremylt   ierr = CeedCalloc(16, &data->evecsin); CeedChk(ierr);
94916383ffcSjeremylt   ierr = CeedCalloc(16, &data->evecsout); CeedChk(ierr);
95016383ffcSjeremylt   ierr = CeedCalloc(16, &data->qvecsin); CeedChk(ierr);
95116383ffcSjeremylt   ierr = CeedCalloc(16, &data->qvecsout); CeedChk(ierr);
9523bfcb0deSjeremylt 
9533bfcb0deSjeremylt   // Set up infield and outfield pointer arrays
9543bfcb0deSjeremylt   // Infields
95516383ffcSjeremylt   ierr = CeedOperatorSetupFields_Magma(qf, op, 0, data->Evecs,
95616383ffcSjeremylt                                        data->evecsin, data->qvecsin, 0,
95716383ffcSjeremylt                                        numinputfields, Q);
95816383ffcSjeremylt   CeedChk(ierr);
9593bfcb0deSjeremylt   // Outfields
96016383ffcSjeremylt   ierr = CeedOperatorSetupFields_Magma(qf, op, 1, data->Evecs,
96116383ffcSjeremylt                                        data->evecsout, data->qvecsout,
96216383ffcSjeremylt                                        numinputfields, numoutputfields, Q);
96316383ffcSjeremylt   CeedChk(ierr);
96416383ffcSjeremylt   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
9653bfcb0deSjeremylt   return 0;
9663bfcb0deSjeremylt }
9673bfcb0deSjeremylt 
96868e7ed8aSjeremylt static int CeedOperatorApply_Magma(CeedOperator op, CeedVector invec,
9693bfcb0deSjeremylt                                    CeedVector outvec, CeedRequest *request) {
9703bfcb0deSjeremylt   int ierr;
97116383ffcSjeremylt   Ceed ceed;
97216383ffcSjeremylt   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
97316383ffcSjeremylt   CeedOperator_Magma *data;
97416383ffcSjeremylt   ierr = CeedOperatorGetData(op, (void *)&data); CeedChk(ierr);
97516383ffcSjeremylt   //CeedVector *E = data->Evecs, *D = data->D, outvec;
97616383ffcSjeremylt   CeedInt Q, elemsize, numelements, numinputfields, numoutputfields, ncomp;
97716383ffcSjeremylt   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
97816383ffcSjeremylt   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
97916383ffcSjeremylt   CeedQFunction qf;
98016383ffcSjeremylt   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
98116383ffcSjeremylt   CeedTransposeMode lmode;
98216383ffcSjeremylt   CeedOperatorField *opinputfields, *opoutputfields;
98316383ffcSjeremylt   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
98416383ffcSjeremylt   CeedChk(ierr);
98516383ffcSjeremylt   CeedQFunctionField *qfinputfields, *qfoutputfields;
98616383ffcSjeremylt   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
98716383ffcSjeremylt   CeedChk(ierr);
98816383ffcSjeremylt   CeedEvalMode emode;
98916383ffcSjeremylt   CeedVector vec;
99016383ffcSjeremylt   CeedBasis basis;
99116383ffcSjeremylt   CeedElemRestriction Erestrict;
9923bfcb0deSjeremylt 
99368e7ed8aSjeremylt   ierr = CeedOperatorSetup_Magma(op); CeedChk(ierr);
9949f0427d9SYohann   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
9959f0427d9SYohann   CeedChk(ierr);
9963bfcb0deSjeremylt 
9973bfcb0deSjeremylt   // Input Evecs and Restriction
99816383ffcSjeremylt   for (CeedInt i=0; i<numinputfields; i++) {
99916383ffcSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
10000f5de9e9Sjeremylt     CeedChk(ierr);
100116383ffcSjeremylt     if (emode & CEED_EVAL_WEIGHT) {
100216383ffcSjeremylt     } else { // Restriction
100316383ffcSjeremylt       // Get input vector
100416383ffcSjeremylt       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
100516383ffcSjeremylt       if (vec == CEED_VECTOR_ACTIVE)
100616383ffcSjeremylt         vec = invec;
1007668048e2SJed Brown       // Restrict
100816383ffcSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
100916383ffcSjeremylt       CeedChk(ierr);
101016383ffcSjeremylt       ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode); CeedChk(ierr);
101116383ffcSjeremylt       ierr = CeedElemRestrictionApply(Erestrict, CEED_NOTRANSPOSE,
101216383ffcSjeremylt                                       lmode, vec, data->Evecs[i],
1013668048e2SJed Brown                                       request); CeedChk(ierr);
1014668048e2SJed Brown       // Get evec
101516383ffcSjeremylt       ierr = CeedVectorGetArrayRead(data->Evecs[i], CEED_MEM_HOST,
101616383ffcSjeremylt                                     (const CeedScalar **) &data->Edata[i]);
101716383ffcSjeremylt       CeedChk(ierr);
10183bfcb0deSjeremylt     }
10193bfcb0deSjeremylt   }
10203bfcb0deSjeremylt 
10213bfcb0deSjeremylt   // Output Evecs
102216383ffcSjeremylt   for (CeedInt i=0; i<numoutputfields; i++) {
102316383ffcSjeremylt     ierr = CeedVectorGetArray(data->Evecs[i+data->numein], CEED_MEM_HOST,
102416383ffcSjeremylt                               &data->Edata[i + numinputfields]); CeedChk(ierr);
10253bfcb0deSjeremylt   }
10263bfcb0deSjeremylt 
10273bfcb0deSjeremylt   // Loop through elements
102816383ffcSjeremylt   for (CeedInt e=0; e<numelements; e++) {
10293bfcb0deSjeremylt     // Input basis apply if needed
103016383ffcSjeremylt     for (CeedInt i=0; i<numinputfields; i++) {
1031135a076eSjeremylt       // Get elemsize, emode, ncomp
103216383ffcSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
103316383ffcSjeremylt       CeedChk(ierr);
103416383ffcSjeremylt       ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
103516383ffcSjeremylt       CeedChk(ierr);
103616383ffcSjeremylt       ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
103716383ffcSjeremylt       CeedChk(ierr);
103816383ffcSjeremylt       ierr = CeedQFunctionFieldGetNumComponents(qfinputfields[i], &ncomp);
103916383ffcSjeremylt       CeedChk(ierr);
10403bfcb0deSjeremylt       // Basis action
10413bfcb0deSjeremylt       switch(emode) {
10423bfcb0deSjeremylt       case CEED_EVAL_NONE:
104316383ffcSjeremylt         ierr = CeedVectorSetArray(data->qvecsin[i], CEED_MEM_HOST,
104416383ffcSjeremylt                                   CEED_USE_POINTER,
104516383ffcSjeremylt                                   &data->Edata[i][e*Q*ncomp]); CeedChk(ierr);
10463bfcb0deSjeremylt         break;
10473bfcb0deSjeremylt       case CEED_EVAL_INTERP:
104816383ffcSjeremylt         ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
104916383ffcSjeremylt         ierr = CeedVectorSetArray(data->evecsin[i], CEED_MEM_HOST,
105016383ffcSjeremylt                                   CEED_USE_POINTER,
105116383ffcSjeremylt                                   &data->Edata[i][e*elemsize*ncomp]);
10523bfcb0deSjeremylt         CeedChk(ierr);
105316383ffcSjeremylt         ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE,
105416383ffcSjeremylt                               CEED_EVAL_INTERP, data->evecsin[i],
105516383ffcSjeremylt                               data->qvecsin[i]); CeedChk(ierr);
10563bfcb0deSjeremylt         break;
10573bfcb0deSjeremylt       case CEED_EVAL_GRAD:
105816383ffcSjeremylt         ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
105916383ffcSjeremylt         ierr = CeedVectorSetArray(data->evecsin[i], CEED_MEM_HOST,
106016383ffcSjeremylt                                   CEED_USE_POINTER,
106116383ffcSjeremylt                                   &data->Edata[i][e*elemsize*ncomp]);
10623bfcb0deSjeremylt         CeedChk(ierr);
106316383ffcSjeremylt         ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE,
106416383ffcSjeremylt                               CEED_EVAL_GRAD, data->evecsin[i],
106516383ffcSjeremylt                               data->qvecsin[i]); CeedChk(ierr);
10663bfcb0deSjeremylt         break;
10673bfcb0deSjeremylt       case CEED_EVAL_WEIGHT:
10683bfcb0deSjeremylt         break;  // No action
10693bfcb0deSjeremylt       case CEED_EVAL_DIV:
107016383ffcSjeremylt         break; // Not implemented
10713bfcb0deSjeremylt       case CEED_EVAL_CURL:
107216383ffcSjeremylt         break; // Not implemented
10733bfcb0deSjeremylt       }
10743bfcb0deSjeremylt     }
10753bfcb0deSjeremylt     // Output pointers
107616383ffcSjeremylt     for (CeedInt i=0; i<numoutputfields; i++) {
107716383ffcSjeremylt       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
107816383ffcSjeremylt       CeedChk(ierr);
10793bfcb0deSjeremylt       if (emode == CEED_EVAL_NONE) {
108016383ffcSjeremylt         ierr = CeedQFunctionFieldGetNumComponents(qfoutputfields[i], &ncomp);
108116383ffcSjeremylt         CeedChk(ierr);
108216383ffcSjeremylt         ierr = CeedVectorSetArray(data->qvecsout[i], CEED_MEM_HOST,
108316383ffcSjeremylt                                   CEED_USE_POINTER,
108416383ffcSjeremylt                                   &data->Edata[i + numinputfields][e*Q*ncomp]);
108516383ffcSjeremylt         CeedChk(ierr);
108616383ffcSjeremylt       }
108716383ffcSjeremylt       if (emode == CEED_EVAL_INTERP) {
108816383ffcSjeremylt       }
108916383ffcSjeremylt       if (emode == CEED_EVAL_GRAD) {
109016383ffcSjeremylt       }
109116383ffcSjeremylt       if (emode == CEED_EVAL_WEIGHT) {
10923bfcb0deSjeremylt       }
10933bfcb0deSjeremylt     }
109416383ffcSjeremylt 
10953bfcb0deSjeremylt     // Q function
109616383ffcSjeremylt     ierr = CeedQFunctionApply(qf, Q, data->qvecsin, data->qvecsout); CeedChk(ierr);
10973bfcb0deSjeremylt 
10983bfcb0deSjeremylt     // Output basis apply if needed
109916383ffcSjeremylt     for (CeedInt i=0; i<numoutputfields; i++) {
1100135a076eSjeremylt       // Get elemsize, emode, ncomp
110116383ffcSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
110216383ffcSjeremylt       CeedChk(ierr);
110316383ffcSjeremylt       ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
110416383ffcSjeremylt       CeedChk(ierr);
110516383ffcSjeremylt       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
110616383ffcSjeremylt       CeedChk(ierr);
110716383ffcSjeremylt       ierr = CeedQFunctionFieldGetNumComponents(qfoutputfields[i], &ncomp);
110816383ffcSjeremylt       CeedChk(ierr);
11093bfcb0deSjeremylt       // Basis action
11103bfcb0deSjeremylt       switch(emode) {
11113bfcb0deSjeremylt       case CEED_EVAL_NONE:
11123bfcb0deSjeremylt         break; // No action
11133bfcb0deSjeremylt       case CEED_EVAL_INTERP:
111416383ffcSjeremylt         ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
111516383ffcSjeremylt         CeedChk(ierr);
111616383ffcSjeremylt         ierr = CeedVectorSetArray(data->evecsout[i], CEED_MEM_HOST,
111716383ffcSjeremylt                                   CEED_USE_POINTER,
111816383ffcSjeremylt                                   &data->Edata[i + numinputfields][e*elemsize*ncomp]);
111916383ffcSjeremylt         ierr = CeedBasisApply(basis, 1, CEED_TRANSPOSE,
112016383ffcSjeremylt                               CEED_EVAL_INTERP, data->qvecsout[i],
112116383ffcSjeremylt                               data->evecsout[i]); CeedChk(ierr);
11223bfcb0deSjeremylt         break;
11233bfcb0deSjeremylt       case CEED_EVAL_GRAD:
112416383ffcSjeremylt         ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
11253bfcb0deSjeremylt         CeedChk(ierr);
112616383ffcSjeremylt         ierr = CeedVectorSetArray(data->evecsout[i], CEED_MEM_HOST,
112716383ffcSjeremylt                                   CEED_USE_POINTER,
112816383ffcSjeremylt                                   &data->Edata[i + numinputfields][e*elemsize*ncomp]);
112916383ffcSjeremylt         ierr = CeedBasisApply(basis, 1, CEED_TRANSPOSE,
113016383ffcSjeremylt                               CEED_EVAL_GRAD, data->qvecsout[i],
113116383ffcSjeremylt                               data->evecsout[i]); CeedChk(ierr);
11323bfcb0deSjeremylt         break;
113316383ffcSjeremylt       case CEED_EVAL_WEIGHT: break; // Should not occur
113416383ffcSjeremylt       case CEED_EVAL_DIV: break; // Not implemented
113516383ffcSjeremylt       case CEED_EVAL_CURL: break; // Not implemented
11363bfcb0deSjeremylt       }
11373bfcb0deSjeremylt     }
113816383ffcSjeremylt   } // numelements
113916383ffcSjeremylt 
114016383ffcSjeremylt   // Zero lvecs
114116383ffcSjeremylt   for (CeedInt i=0; i<numoutputfields; i++) {
114216383ffcSjeremylt     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
114316383ffcSjeremylt     if (vec == CEED_VECTOR_ACTIVE)
114416383ffcSjeremylt       vec = outvec;
114516383ffcSjeremylt     ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
11463bfcb0deSjeremylt   }
11473bfcb0deSjeremylt 
11483bfcb0deSjeremylt   // Output restriction
114916383ffcSjeremylt   for (CeedInt i=0; i<numoutputfields; i++) {
115016383ffcSjeremylt     // Restore evec
115116383ffcSjeremylt     ierr = CeedVectorRestoreArray(data->Evecs[i+data->numein],
115216383ffcSjeremylt                                   &data->Edata[i + numinputfields]);
115316383ffcSjeremylt     CeedChk(ierr);
115416383ffcSjeremylt     // Get output vector
115516383ffcSjeremylt     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
1156668048e2SJed Brown     // Active
115716383ffcSjeremylt     if (vec == CEED_VECTOR_ACTIVE)
115816383ffcSjeremylt       vec = outvec;
11596a35ea15Sjeremylt     // Restrict
116016383ffcSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
11613bfcb0deSjeremylt     CeedChk(ierr);
116216383ffcSjeremylt     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr);
116316383ffcSjeremylt     ierr = CeedElemRestrictionApply(Erestrict, CEED_TRANSPOSE,
116416383ffcSjeremylt                                     lmode, data->Evecs[i+data->numein], vec,
1165668048e2SJed Brown                                     request); CeedChk(ierr);
11663bfcb0deSjeremylt   }
11673bfcb0deSjeremylt 
11686a35ea15Sjeremylt   // Restore input arrays
116916383ffcSjeremylt   for (CeedInt i=0; i<numinputfields; i++) {
117016383ffcSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
117116383ffcSjeremylt     CeedChk(ierr);
11726a35ea15Sjeremylt     if (emode & CEED_EVAL_WEIGHT) {
11736a35ea15Sjeremylt     } else {
117416383ffcSjeremylt       // Restriction
117516383ffcSjeremylt       ierr = CeedVectorRestoreArrayRead(data->Evecs[i],
117616383ffcSjeremylt                                         (const CeedScalar **) &data->Edata[i]);
117716383ffcSjeremylt       CeedChk(ierr);
11786a35ea15Sjeremylt     }
11796a35ea15Sjeremylt   }
118082b77998SStan Tomov   return 0;
118182b77998SStan Tomov }
118282b77998SStan Tomov 
118382b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) {
118482b77998SStan Tomov   int ierr;
118516383ffcSjeremylt   CeedOperator_Magma *impl;
118616383ffcSjeremylt   Ceed ceed;
118716383ffcSjeremylt   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
118882b77998SStan Tomov 
118982b77998SStan Tomov   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
119016383ffcSjeremylt   ierr = CeedOperatorSetData(op, (void *)&impl);
119116383ffcSjeremylt 
119216383ffcSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply",
119316383ffcSjeremylt                                 CeedOperatorApply_Magma); CeedChk(ierr);
119416383ffcSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
119516383ffcSjeremylt                                 CeedOperatorDestroy_Magma); CeedChk(ierr);
119682b77998SStan Tomov   return 0;
119782b77998SStan Tomov }
119882b77998SStan Tomov 
1199*52d6035fSJeremy L Thompson int CeedCompositeOperatorCreate_Magma(CeedOperator op) {
1200*52d6035fSJeremy L Thompson   int ierr;
1201*52d6035fSJeremy L Thompson   Ceed ceed;
1202*52d6035fSJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
1203*52d6035fSJeremy L Thompson   return CeedError(ceed, 1, "Backend does not support composite operators");
1204*52d6035fSJeremy L Thompson }
1205*52d6035fSJeremy L Thompson 
120682b77998SStan Tomov // *****************************************************************************
120782b77998SStan Tomov // * INIT
120882b77998SStan Tomov // *****************************************************************************
120982b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) {
12101dc2661bSVeselin Dobrev   int ierr;
12112a847359SStan Tomov   if (strcmp(resource, "/gpu/magma"))
121282b77998SStan Tomov     return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource);
121393fbe329SStan Tomov 
12141dc2661bSVeselin Dobrev   ierr = magma_init();
12151dc2661bSVeselin Dobrev   if (ierr) return CeedError(ceed, 1, "error in magma_init(): %d\n", ierr);
121693fbe329SStan Tomov   //magma_print_environment();
121793fbe329SStan Tomov 
121882b77998SStan Tomov   ceed->VecCreate = CeedVectorCreate_Magma;
121982b77998SStan Tomov   ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma;
1220a8de75f0Sjeremylt   ceed->BasisCreateH1 = CeedBasisCreateH1_Magma;
122182b77998SStan Tomov   ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma;
12224cc6aec3Sjeremylt   ceed->ElemRestrictionCreateBlocked = CeedElemRestrictionCreateBlocked_Magma;
122382b77998SStan Tomov   ceed->QFunctionCreate = CeedQFunctionCreate_Magma;
122482b77998SStan Tomov   ceed->OperatorCreate = CeedOperatorCreate_Magma;
1225*52d6035fSJeremy L Thompson   ceed->CompositeOperatorCreate = CeedCompositeOperatorCreate_Magma;
122682b77998SStan Tomov   return 0;
122782b77998SStan Tomov }
122882b77998SStan Tomov 
122982b77998SStan Tomov // *****************************************************************************
123082b77998SStan Tomov // * REGISTER
123182b77998SStan Tomov // *****************************************************************************
123282b77998SStan Tomov __attribute__((constructor))
123382b77998SStan Tomov static void Register(void) {
123444951fc6Sjeremylt   CeedRegister("/gpu/magma", CeedInit_Magma, 20);
123582b77998SStan Tomov }
1236