xref: /libCEED/backends/ref/ceed-ref.c (revision ae3cba8242975de624b6eb0f70528af789964ca1)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed-impl.h>
18 #include <string.h>
19 
20 typedef struct {
21   CeedScalar *array;
22   CeedScalar *array_allocated;
23 } CeedVector_Ref;
24 
25 typedef struct {
26   const CeedInt *indices;
27   CeedInt *indices_allocated;
28 } CeedElemRestriction_Ref;
29 
30 typedef struct {
31   CeedVector etmp;
32   CeedVector qdata;
33 } CeedOperator_Ref;
34 
35 static int CeedVectorSetArray_Ref(CeedVector vec, CeedMemType mtype,
36                                   CeedCopyMode cmode, CeedScalar *array) {
37   CeedVector_Ref *impl = vec->data;
38   int ierr;
39 
40   if (mtype != CEED_MEM_HOST)
41     return CeedError(vec->ceed, 1, "Only MemType = HOST supported");
42   ierr = CeedFree(&impl->array_allocated); CeedChk(ierr);
43   switch (cmode) {
44   case CEED_COPY_VALUES:
45     ierr = CeedMalloc(vec->length, &impl->array_allocated); CeedChk(ierr);
46     impl->array = impl->array_allocated;
47     if (array) memcpy(impl->array, array, vec->length * sizeof(array[0]));
48     break;
49   case CEED_OWN_POINTER:
50     impl->array_allocated = array;
51     impl->array = array;
52     break;
53   case CEED_USE_POINTER:
54     impl->array = array;
55   }
56   return 0;
57 }
58 
59 static int CeedVectorGetArray_Ref(CeedVector vec, CeedMemType mtype,
60                                   CeedScalar **array) {
61   CeedVector_Ref *impl = vec->data;
62   int ierr;
63 
64   if (mtype != CEED_MEM_HOST)
65     return CeedError(vec->ceed, 1, "Can only provide to HOST memory");
66   if (!impl->array) { // Allocate if array is not yet allocated
67     ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL);
68     CeedChk(ierr);
69   }
70   *array = impl->array;
71   return 0;
72 }
73 
74 static int CeedVectorGetArrayRead_Ref(CeedVector vec, CeedMemType mtype,
75                                       const CeedScalar **array) {
76   CeedVector_Ref *impl = vec->data;
77   int ierr;
78 
79   if (mtype != CEED_MEM_HOST)
80     return CeedError(vec->ceed, 1, "Can only provide to HOST memory");
81   if (!impl->array) { // Allocate if array is not yet allocated
82     ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL);
83     CeedChk(ierr);
84   }
85   *array = impl->array;
86   return 0;
87 }
88 
89 static int CeedVectorRestoreArray_Ref(CeedVector vec, CeedScalar **array) {
90   *array = NULL;
91   return 0;
92 }
93 
94 static int CeedVectorRestoreArrayRead_Ref(CeedVector vec,
95     const CeedScalar **array) {
96   *array = NULL;
97   return 0;
98 }
99 
100 static int CeedVectorDestroy_Ref(CeedVector vec) {
101   CeedVector_Ref *impl = vec->data;
102   int ierr;
103 
104   ierr = CeedFree(&impl->array_allocated); CeedChk(ierr);
105   ierr = CeedFree(&vec->data); CeedChk(ierr);
106   return 0;
107 }
108 
109 static int CeedVectorCreate_Ref(Ceed ceed, CeedInt n, CeedVector vec) {
110   CeedVector_Ref *impl;
111   int ierr;
112 
113   vec->SetArray = CeedVectorSetArray_Ref;
114   vec->GetArray = CeedVectorGetArray_Ref;
115   vec->GetArrayRead = CeedVectorGetArrayRead_Ref;
116   vec->RestoreArray = CeedVectorRestoreArray_Ref;
117   vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Ref;
118   vec->Destroy = CeedVectorDestroy_Ref;
119   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
120   vec->data = impl;
121   return 0;
122 }
123 
124 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
125                                         CeedTransposeMode tmode, CeedVector u,
126                                         CeedVector v, CeedRequest *request) {
127   CeedElemRestriction_Ref *impl = r->data;
128   int ierr;
129   const CeedScalar *uu;
130   CeedScalar *vv;
131 
132   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
133   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
134   if (tmode == CEED_NOTRANSPOSE) {
135     for (CeedInt i=0; i<r->nelem*r->elemsize; i++) vv[i] = uu[impl->indices[i]];
136   } else {
137     for (CeedInt i=0; i<r->nelem*r->elemsize; i++) vv[impl->indices[i]] += uu[i];
138   }
139   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
140   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
141   if (request != CEED_REQUEST_IMMEDIATE) *request = NULL;
142   return 0;
143 }
144 
145 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
146   CeedElemRestriction_Ref *impl = r->data;
147   int ierr;
148 
149   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
150   ierr = CeedFree(&r->data); CeedChk(ierr);
151   return 0;
152 }
153 
154 static int CeedElemRestrictionCreate_Ref(CeedElemRestriction r,
155     CeedMemType mtype,
156     CeedCopyMode cmode, const CeedInt *indices) {
157   int ierr;
158   CeedElemRestriction_Ref *impl;
159 
160   if (mtype != CEED_MEM_HOST)
161     return CeedError(r->ceed, 1, "Only MemType = HOST supported");
162   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
163   switch (cmode) {
164   case CEED_COPY_VALUES:
165     ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated);
166     CeedChk(ierr);
167     memcpy(impl->indices_allocated, indices,
168            r->nelem * r->elemsize * sizeof(indices[0]));
169     impl->indices = impl->indices_allocated;
170     break;
171   case CEED_OWN_POINTER:
172     impl->indices_allocated = (CeedInt*)indices;
173     impl->indices = impl->indices_allocated;
174     break;
175   case CEED_USE_POINTER:
176     impl->indices = indices;
177   }
178   r->data = impl;
179   r->Apply = CeedElemRestrictionApply_Ref;
180   r->Destroy = CeedElemRestrictionDestroy_Ref;
181   return 0;
182 }
183 
184 // Contracts on the middle index
185 // NOTRANSPOSE: V_ajc = T_jb U_abc
186 // TRANSPOSE:   V_ajc = T_bj U_abc
187 static int CeedTensorContract_Ref(Ceed ceed,
188                                   CeedInt A, CeedInt B, CeedInt C, CeedInt J,
189                                   const CeedScalar *t, CeedTransposeMode tmode,
190                                   const CeedScalar *u, CeedScalar *v) {
191   CeedInt tstride0 = B, tstride1 = 1;
192   if (tmode == CEED_TRANSPOSE) {
193     tstride0 = 1; tstride1 = J;
194   }
195 
196   for (CeedInt a=0; a<A; a++) {
197     for (CeedInt j=0; j<J; j++) {
198       for (CeedInt c=0; c<C; c++)
199         v[(a*J+j)*C+c] = 0;
200       for (CeedInt b=0; b<B; b++) {
201         for (CeedInt c=0; c<C; c++) {
202           v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c];
203         }
204       }
205     }
206   }
207   return 0;
208 }
209 
210 static int CeedBasisApply_Ref(CeedBasis basis, CeedTransposeMode tmode,
211                               CeedEvalMode emode,
212                               const CeedScalar *u, CeedScalar *v) {
213   int ierr;
214   const CeedInt dim = basis->dim;
215   const CeedInt ndof = basis->ndof;
216 
217   switch (emode) {
218   case CEED_EVAL_NONE: break;
219   case CEED_EVAL_INTERP: {
220     CeedInt P = basis->P1d, Q = basis->Q1d;
221     if (tmode == CEED_TRANSPOSE) {
222       P = basis->Q1d; Q = basis->P1d;
223     }
224     CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
225     CeedScalar tmp[2][Q*CeedPowInt(P>Q?P:Q, dim-1)];
226     for (CeedInt d=0; d<dim; d++) {
227       ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, basis->interp1d,
228                                     tmode,
229                                     d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); CeedChk(ierr);
230       pre /= P;
231       post *= Q;
232     }
233   } break;
234   case CEED_EVAL_WEIGHT: {
235     if (tmode == CEED_TRANSPOSE)
236       return CeedError(basis->ceed, 1, "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
237     CeedInt Q = basis->Q1d;
238     for (CeedInt d=0; d<dim; d++) {
239       CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d);
240       for (CeedInt i=0; i<pre; i++) {
241         for (CeedInt j=0; j<Q; j++) {
242           for (CeedInt k=0; k<post; k++) {
243             v[(i*Q + j)*post + k] = basis->qweight1d[j]
244               * (d == 0 ? 1 : v[(i*Q + j)*post + k]);
245           }
246         }
247       }
248     }
249   } break;
250   default:
251     return CeedError(basis->ceed, 1, "EvalMode %d not supported", emode);
252   }
253   return 0;
254 }
255 
256 static int CeedBasisDestroy_Ref(CeedBasis basis) {
257   return 0;
258 }
259 
260 static int CeedBasisCreateTensorH1_Ref(Ceed ceed, CeedInt dim, CeedInt P1d,
261                                        CeedInt Q1d, const CeedScalar *interp1d,
262                                        const CeedScalar *grad1d,
263                                        const CeedScalar *qref1d,
264                                        const CeedScalar *qweight1d,
265                                        CeedBasis basis) {
266   basis->Apply = CeedBasisApply_Ref;
267   basis->Destroy = CeedBasisDestroy_Ref;
268   return 0;
269 }
270 
271 static int CeedQFunctionApply_Ref(CeedQFunction qf, void *qdata, CeedInt Q,
272                                   const CeedScalar *const *u,
273                                   CeedScalar *const *v) {
274   int ierr;
275   ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr);
276   return 0;
277 }
278 
279 static int CeedQFunctionDestroy_Ref(CeedQFunction qf) {
280   return 0;
281 }
282 
283 static int CeedQFunctionCreate_Ref(CeedQFunction qf) {
284   qf->Apply = CeedQFunctionApply_Ref;
285   qf->Destroy = CeedQFunctionDestroy_Ref;
286   return 0;
287 }
288 
289 static int CeedOperatorDestroy_Ref(CeedOperator op) {
290   CeedOperator_Ref *impl = op->data;
291   int ierr;
292 
293   ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr);
294   ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr);
295   ierr = CeedFree(&op->data); CeedChk(ierr);
296   return 0;
297 }
298 
299 static int CeedOperatorApply_Ref(CeedOperator op, CeedVector qdata,
300                                  CeedVector ustate,
301                                  CeedVector residual, CeedRequest *request) {
302   CeedOperator_Ref *impl = op->data;
303   CeedVector etmp;
304   CeedInt Q;
305   CeedScalar *Eu;
306   char *qd;
307   int ierr;
308 
309   if (!impl->etmp) {
310     ierr = CeedVectorCreate(op->ceed,
311                             op->Erestrict->nelem * op->Erestrict->elemsize,
312                             &impl->etmp); CeedChk(ierr);
313   }
314   etmp = impl->etmp;
315   if (op->qf->inmode != CEED_EVAL_NONE || op->qf->inmode != CEED_EVAL_WEIGHT) {
316     ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, ustate, etmp,
317                                     CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
318   }
319   ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
320   ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr);
321   ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); CeedChk(ierr);
322   for (CeedInt e=0; e<op->Erestrict->nelem; e++) {
323     CeedScalar BEu[Q], BEv[Q], *out[1];
324     const CeedScalar *in[1];
325     ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode,
326                           &Eu[e*op->Erestrict->elemsize], BEu); CeedChk(ierr);
327     in[0] = BEu;
328     out[0] = BEv;
329     ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out);
330     CeedChk(ierr);
331     ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv,
332                           &Eu[e*op->Erestrict->elemsize]); CeedChk(ierr);
333   }
334   ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr);
335   if (residual) {
336     ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, etmp, residual,
337                                     CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
338   }
339   if (request != CEED_REQUEST_IMMEDIATE) *request = NULL;
340   return 0;
341 }
342 
343 static int CeedOperatorGetQData_Ref(CeedOperator op, CeedVector *qdata) {
344   CeedOperator_Ref *impl = op->data;
345   int ierr;
346 
347   if (!impl->qdata) {
348     CeedInt Q;
349     ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
350     ierr = CeedVectorCreate(op->ceed,
351                             op->Erestrict->nelem * Q * op->basis->ndof,
352                             &impl->qdata); CeedChk(ierr);
353   }
354   *qdata = impl->qdata;
355   return 0;
356 }
357 
358 static int CeedOperatorCreate_Ref(CeedOperator op) {
359   CeedOperator_Ref *impl;
360   int ierr;
361 
362   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
363   op->data = impl;
364   op->Destroy = CeedOperatorDestroy_Ref;
365   op->Apply = CeedOperatorApply_Ref;
366   op->GetQData = CeedOperatorGetQData_Ref;
367   return 0;
368 }
369 
370 static int CeedInit_Ref(const char *resource, Ceed ceed) {
371   if (strcmp(resource, "/cpu/self")
372       && strcmp(resource, "/cpu/self/ref"))
373     return CeedError(ceed, 1, "Ref backend cannot use resource: %s", resource);
374   ceed->VecCreate = CeedVectorCreate_Ref;
375   ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Ref;
376   ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Ref;
377   ceed->QFunctionCreate = CeedQFunctionCreate_Ref;
378   ceed->OperatorCreate = CeedOperatorCreate_Ref;
379   return 0;
380 }
381 
382 __attribute__((constructor))
383 static void Register(void) {
384   CeedRegister("/cpu/self/ref", CeedInit_Ref);
385 }
386