xref: /libCEED/backends/ref/ceed-ref.c (revision 3ea7ca84b52992f74389dd487277a43bb59ac7ed)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed-impl.h>
18 #include <string.h>
19 
20 typedef struct {
21   CeedScalar *array;
22   CeedScalar *array_allocated;
23 } CeedVector_Ref;
24 
25 typedef struct {
26   const CeedInt *indices;
27   CeedInt *indices_allocated;
28 } CeedElemRestriction_Ref;
29 
30 typedef struct {
31   CeedVector etmp;
32   CeedVector qdata;
33 } CeedOperator_Ref;
34 
35 static int CeedVectorSetArray_Ref(CeedVector vec, CeedMemType mtype,
36                                   CeedCopyMode cmode, CeedScalar *array) {
37   CeedVector_Ref *impl = vec->data;
38   int ierr;
39 
40   if (mtype != CEED_MEM_HOST)
41     return CeedError(vec->ceed, 1, "Only MemType = HOST supported");
42   ierr = CeedFree(&impl->array_allocated); CeedChk(ierr);
43   switch (cmode) {
44   case CEED_COPY_VALUES:
45     ierr = CeedMalloc(vec->length, &impl->array_allocated); CeedChk(ierr);
46     impl->array = impl->array_allocated;
47     if (array) memcpy(impl->array, array, vec->length * sizeof(array[0]));
48     break;
49   case CEED_OWN_POINTER:
50     impl->array_allocated = array;
51     impl->array = array;
52     break;
53   case CEED_USE_POINTER:
54     impl->array = array;
55   }
56   return 0;
57 }
58 
59 static int CeedVectorGetArray_Ref(CeedVector vec, CeedMemType mtype,
60                                   CeedScalar **array) {
61   CeedVector_Ref *impl = vec->data;
62   int ierr;
63 
64   if (mtype != CEED_MEM_HOST)
65     return CeedError(vec->ceed, 1, "Can only provide to HOST memory");
66   if (!impl->array) { // Allocate if array is not yet allocated
67     ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL);
68     CeedChk(ierr);
69   }
70   *array = impl->array;
71   return 0;
72 }
73 
74 static int CeedVectorGetArrayRead_Ref(CeedVector vec, CeedMemType mtype,
75                                       const CeedScalar **array) {
76   CeedVector_Ref *impl = vec->data;
77   int ierr;
78 
79   if (mtype != CEED_MEM_HOST)
80     return CeedError(vec->ceed, 1, "Can only provide to HOST memory");
81   if (!impl->array) { // Allocate if array is not yet allocated
82     ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL);
83     CeedChk(ierr);
84   }
85   *array = impl->array;
86   return 0;
87 }
88 
89 static int CeedVectorRestoreArray_Ref(CeedVector vec, CeedScalar **array) {
90   *array = NULL;
91   return 0;
92 }
93 
94 static int CeedVectorRestoreArrayRead_Ref(CeedVector vec,
95     const CeedScalar **array) {
96   *array = NULL;
97   return 0;
98 }
99 
100 static int CeedVectorDestroy_Ref(CeedVector vec) {
101   CeedVector_Ref *impl = vec->data;
102   int ierr;
103 
104   ierr = CeedFree(&impl->array_allocated); CeedChk(ierr);
105   ierr = CeedFree(&vec->data); CeedChk(ierr);
106   return 0;
107 }
108 
109 static int CeedVectorCreate_Ref(Ceed ceed, CeedInt n, CeedVector vec) {
110   CeedVector_Ref *impl;
111   int ierr;
112 
113   vec->SetArray = CeedVectorSetArray_Ref;
114   vec->GetArray = CeedVectorGetArray_Ref;
115   vec->GetArrayRead = CeedVectorGetArrayRead_Ref;
116   vec->RestoreArray = CeedVectorRestoreArray_Ref;
117   vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Ref;
118   vec->Destroy = CeedVectorDestroy_Ref;
119   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
120   vec->data = impl;
121   return 0;
122 }
123 
124 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
125                                         CeedTransposeMode tmode, CeedInt ncomp,
126                                         CeedTransposeMode lmode, CeedVector u,
127                                         CeedVector v, CeedRequest *request) {
128   CeedElemRestriction_Ref *impl = r->data;
129   int ierr;
130   const CeedScalar *uu;
131   CeedScalar *vv;
132   CeedInt esize = r->nelem*r->elemsize;
133 
134   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
135   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
136   if (tmode == CEED_NOTRANSPOSE) {
137     // Perform: v = r * u
138     if (ncomp == 1) {
139       for (CeedInt i=0; i<esize; i++) vv[i] = uu[impl->indices[i]];
140     } else {
141       // vv is (elemsize x ncomp x nelem), column-major
142       if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major
143         for (CeedInt e = 0; e < r->nelem; e++)
144           for (CeedInt d = 0; d < ncomp; d++)
145             for (CeedInt i=0; i<r->elemsize; i++) {
146               vv[i+r->elemsize*(d+ncomp*e)] =
147                 uu[impl->indices[i+r->elemsize*e]+r->ndof*d];
148             }
149       } else { // u is (ncomp x ndof), column-major
150         for (CeedInt e = 0; e < r->nelem; e++)
151           for (CeedInt d = 0; d < ncomp; d++)
152             for (CeedInt i=0; i<r->elemsize; i++) {
153               vv[i+r->elemsize*(d+ncomp*e)] =
154                 uu[d+ncomp*impl->indices[i+r->elemsize*e]];
155             }
156       }
157     }
158   } else {
159     // Note: in transpose mode, we perform: v += r^t * u
160     if (ncomp == 1) {
161       for (CeedInt i=0; i<esize; i++) vv[impl->indices[i]] += uu[i];
162     } else {
163       // u is (elemsize x ncomp x nelem)
164       if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major
165         for (CeedInt e = 0; e < r->nelem; e++)
166           for (CeedInt d = 0; d < ncomp; d++)
167             for (CeedInt i=0; i<r->elemsize; i++) {
168               vv[impl->indices[i+r->elemsize*e]+r->ndof*d] +=
169                 uu[i+r->elemsize*(d+e*ncomp)];
170             }
171       } else { // vv is (ncomp x ndof), column-major
172         for (CeedInt e = 0; e < r->nelem; e++)
173           for (CeedInt d = 0; d < ncomp; d++)
174             for (CeedInt i=0; i<r->elemsize; i++) {
175               vv[d+ncomp*impl->indices[i+r->elemsize*e]] +=
176                 uu[i+r->elemsize*(d+e*ncomp)];
177             }
178       }
179     }
180   }
181   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
182   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
183   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
184     *request = NULL;
185   return 0;
186 }
187 
188 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
189   CeedElemRestriction_Ref *impl = r->data;
190   int ierr;
191 
192   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
193   ierr = CeedFree(&r->data); CeedChk(ierr);
194   return 0;
195 }
196 
197 static int CeedElemRestrictionCreate_Ref(CeedElemRestriction r,
198     CeedMemType mtype,
199     CeedCopyMode cmode, const CeedInt *indices) {
200   int ierr;
201   CeedElemRestriction_Ref *impl;
202 
203   if (mtype != CEED_MEM_HOST)
204     return CeedError(r->ceed, 1, "Only MemType = HOST supported");
205   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
206   switch (cmode) {
207   case CEED_COPY_VALUES:
208     ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated);
209     CeedChk(ierr);
210     memcpy(impl->indices_allocated, indices,
211            r->nelem * r->elemsize * sizeof(indices[0]));
212     impl->indices = impl->indices_allocated;
213     break;
214   case CEED_OWN_POINTER:
215     impl->indices_allocated = (CeedInt *)indices;
216     impl->indices = impl->indices_allocated;
217     break;
218   case CEED_USE_POINTER:
219     impl->indices = indices;
220   }
221   r->data = impl;
222   r->Apply = CeedElemRestrictionApply_Ref;
223   r->Destroy = CeedElemRestrictionDestroy_Ref;
224   return 0;
225 }
226 
227 // Contracts on the middle index
228 // NOTRANSPOSE: V_ajc = T_jb U_abc
229 // TRANSPOSE:   V_ajc = T_bj U_abc
230 // If Add != 0, "=" is replaced by "+="
231 static int CeedTensorContract_Ref(Ceed ceed,
232                                   CeedInt A, CeedInt B, CeedInt C, CeedInt J,
233                                   const CeedScalar *t, CeedTransposeMode tmode,
234                                   const CeedInt Add,
235                                   const CeedScalar *u, CeedScalar *v) {
236   CeedInt tstride0 = B, tstride1 = 1;
237   if (tmode == CEED_TRANSPOSE) {
238     tstride0 = 1; tstride1 = J;
239   }
240 
241   for (CeedInt a=0; a<A; a++) {
242     for (CeedInt j=0; j<J; j++) {
243       if (!Add) {
244         for (CeedInt c=0; c<C; c++)
245           v[(a*J+j)*C+c] = 0;
246       }
247       for (CeedInt b=0; b<B; b++) {
248         for (CeedInt c=0; c<C; c++) {
249           v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c];
250         }
251       }
252     }
253   }
254   return 0;
255 }
256 
257 static int CeedBasisApply_Ref(CeedBasis basis, CeedTransposeMode tmode,
258                               CeedEvalMode emode,
259                               const CeedScalar *u, CeedScalar *v) {
260   int ierr;
261   const CeedInt dim = basis->dim;
262   const CeedInt ndof = basis->ndof;
263   const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim);
264   const CeedInt add = (tmode == CEED_TRANSPOSE);
265 
266   if (tmode == CEED_TRANSPOSE) {
267     const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim);
268     for (CeedInt i = 0; i < vsize; i++)
269       v[i] = (CeedScalar) 0;
270   }
271   if (emode & CEED_EVAL_INTERP) {
272     CeedInt P = basis->P1d, Q = basis->Q1d;
273     if (tmode == CEED_TRANSPOSE) {
274       P = basis->Q1d; Q = basis->P1d;
275     }
276     CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
277     CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
278     for (CeedInt d=0; d<dim; d++) {
279       ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, basis->interp1d,
280                                     tmode, add&&(d==dim-1),
281                                     d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
282       CeedChk(ierr);
283       pre /= P;
284       post *= Q;
285     }
286     if (tmode == CEED_NOTRANSPOSE) {
287       v += nqpt;
288     } else {
289       u += nqpt;
290     }
291   }
292   if (emode & CEED_EVAL_GRAD) {
293     CeedInt P = basis->P1d, Q = basis->Q1d;
294     // In CEED_NOTRANSPOSE mode:
295     // u is (P^dim x nc), column-major layout (nc = ndof)
296     // v is (Q^dim x nc x dim), column-major layout (nc = ndof)
297     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
298     if (tmode == CEED_TRANSPOSE) {
299       P = basis->Q1d, Q = basis->P1d;
300     }
301     CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
302     for (CeedInt p = 0; p < dim; p++) {
303       CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
304       for (CeedInt d=0; d<dim; d++) {
305         ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q,
306                                       (p==d)?basis->grad1d:basis->interp1d,
307                                       tmode, add&&(d==dim-1),
308                                       d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
309         CeedChk(ierr);
310         pre /= P;
311         post *= Q;
312       }
313       if (tmode == CEED_NOTRANSPOSE) {
314         v += nqpt;
315       } else {
316         u += nqpt;
317       }
318     }
319   }
320   if (emode & CEED_EVAL_WEIGHT) {
321     if (tmode == CEED_TRANSPOSE)
322       return CeedError(basis->ceed, 1,
323                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
324     CeedInt Q = basis->Q1d;
325     for (CeedInt d=0; d<dim; d++) {
326       CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d);
327       for (CeedInt i=0; i<pre; i++) {
328         for (CeedInt j=0; j<Q; j++) {
329           for (CeedInt k=0; k<post; k++) {
330             v[(i*Q + j)*post + k] = basis->qweight1d[j]
331                                     * (d == 0 ? 1 : v[(i*Q + j)*post + k]);
332           }
333         }
334       }
335     }
336   }
337   return 0;
338 }
339 
340 static int CeedBasisDestroy_Ref(CeedBasis basis) {
341   return 0;
342 }
343 
344 static int CeedBasisCreateTensorH1_Ref(Ceed ceed, CeedInt dim, CeedInt P1d,
345                                        CeedInt Q1d, const CeedScalar *interp1d,
346                                        const CeedScalar *grad1d,
347                                        const CeedScalar *qref1d,
348                                        const CeedScalar *qweight1d,
349                                        CeedBasis basis) {
350   basis->Apply = CeedBasisApply_Ref;
351   basis->Destroy = CeedBasisDestroy_Ref;
352   return 0;
353 }
354 
355 static int CeedQFunctionApply_Ref(CeedQFunction qf, void *qdata, CeedInt Q,
356                                   const CeedScalar *const *u,
357                                   CeedScalar *const *v) {
358   int ierr;
359   ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr);
360   return 0;
361 }
362 
363 static int CeedQFunctionDestroy_Ref(CeedQFunction qf) {
364   return 0;
365 }
366 
367 static int CeedQFunctionCreate_Ref(CeedQFunction qf) {
368   qf->Apply = CeedQFunctionApply_Ref;
369   qf->Destroy = CeedQFunctionDestroy_Ref;
370   return 0;
371 }
372 
373 static int CeedOperatorDestroy_Ref(CeedOperator op) {
374   CeedOperator_Ref *impl = op->data;
375   int ierr;
376 
377   ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr);
378   ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr);
379   ierr = CeedFree(&op->data); CeedChk(ierr);
380   return 0;
381 }
382 
383 static int CeedOperatorApply_Ref(CeedOperator op, CeedVector qdata,
384                                  CeedVector ustate,
385                                  CeedVector residual, CeedRequest *request) {
386   //CeedVectorView(ustate,"%g",stdout);
387   CeedOperator_Ref *impl = op->data;
388   CeedVector etmp;
389   CeedInt Q;
390   const CeedInt nc = op->basis->ndof, dim = op->basis->dim;
391   CeedScalar *Eu;
392   char *qd;
393   int ierr;
394   CeedTransposeMode lmode = CEED_NOTRANSPOSE;
395 
396   if (!impl->etmp) {
397     ierr = CeedVectorCreate(op->ceed,
398                             nc * op->Erestrict->nelem * op->Erestrict->elemsize,
399                             &impl->etmp); CeedChk(ierr);
400     // etmp is allocated when CeedVectorGetArray is called below
401   }
402   etmp = impl->etmp;
403   if (op->qf->inmode & ~CEED_EVAL_WEIGHT) {
404     ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE,
405                                     nc, lmode, ustate, etmp,
406                                     CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
407   }
408   //CeedVectorView(etmp,"%g",stdout);
409   ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
410   ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr);
411   //for(int i=0;i<etmp->length;i+=1) printf(" %f",Eu[i]);
412   ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd);
413   //CeedVectorView(qdata,"%g",stdout);
414   CeedChk(ierr);
415   for (CeedInt e=0; e<op->Erestrict->nelem; e++) {
416     CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0};
417     const CeedScalar *in[5] = {0,0,0,0,0};
418     // TODO: quadrature weights can be computed just once
419     ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode,
420                           &Eu[e*op->Erestrict->elemsize*nc], BEu);
421     //for(int i=0;i<Q*nc*(dim+2);i+=1) printf(" %f",BEu[i]);
422     CeedChk(ierr);
423     CeedScalar *u_ptr = BEu, *v_ptr = BEv;
424     if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; }
425     if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; }
426     if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; }
427     if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; }
428     if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; }
429     ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out);
430     CeedChk(ierr);
431     //for(int i=0;i<Q*nc;i+=1) printf(" %f",BEv[i]);
432     ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv,
433                           &Eu[e*op->Erestrict->elemsize*nc]);
434     //for(int i=0;i<Q*nc*(dim+2);i+=1) printf(" %f",Eu[e*op->Erestrict->elemsize*nc+i]);
435     CeedChk(ierr);
436   }
437   ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr);
438   //CeedVectorView(etmp,"%g",stdout);
439   //CeedVectorView(qdata,"%g",stdout);
440   if (residual) {
441     CeedScalar *res;
442     CeedVectorGetArray(residual, CEED_MEM_HOST, &res);
443     for (int i = 0; i < residual->length; i++)
444       res[i] = (CeedScalar)0;
445     ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE,
446                                     nc, lmode, etmp, residual,
447                                     CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
448   }
449   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
450     *request = NULL;
451   return 0;
452 }
453 
454 static int CeedOperatorGetQData_Ref(CeedOperator op, CeedVector *qdata) {
455   CeedOperator_Ref *impl = op->data;
456   int ierr;
457 
458   if (!impl->qdata) {
459     CeedInt Q;
460     ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
461     ierr = CeedVectorCreate(op->ceed,
462                             op->Erestrict->nelem * Q
463                             * op->qf->qdatasize / sizeof(CeedScalar),
464                             &impl->qdata); CeedChk(ierr);
465     //CeedVectorView(impl->qdata,"%g",stdout);
466   }
467   *qdata = impl->qdata;
468   return 0;
469 }
470 
471 static int CeedOperatorCreate_Ref(CeedOperator op) {
472   CeedOperator_Ref *impl;
473   int ierr;
474 
475   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
476   op->data = impl;
477   op->Destroy = CeedOperatorDestroy_Ref;
478   op->Apply = CeedOperatorApply_Ref;
479   op->GetQData = CeedOperatorGetQData_Ref;
480   return 0;
481 }
482 
483 static int CeedInit_Ref(const char *resource, Ceed ceed) {
484   if (strcmp(resource, "/cpu/self")
485       && strcmp(resource, "/cpu/self/ref"))
486     return CeedError(ceed, 1, "Ref backend cannot use resource: %s", resource);
487   ceed->VecCreate = CeedVectorCreate_Ref;
488   ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Ref;
489   ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Ref;
490   ceed->QFunctionCreate = CeedQFunctionCreate_Ref;
491   ceed->OperatorCreate = CeedOperatorCreate_Ref;
492   return 0;
493 }
494 
495 __attribute__((constructor))
496 static void Register(void) {
497   CeedRegister("/cpu/self/ref", CeedInit_Ref);
498 }
499