xref: /libCEED/backends/opt/ceed-opt-operator.c (revision 437930d19388999b5cc2d76e2fe0d14f58fb41f3)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include "ceed-opt.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
28                                        bool is_input, const CeedInt blk_size,
29                                        CeedElemRestriction *blk_restr,
30                                        CeedVector *e_vecs_full, CeedVector *e_vecs,
31                                        CeedVector *q_vecs, CeedInt start_e,
32                                        CeedInt num_fields, CeedInt Q) {
33   CeedInt ierr, num_comp, size, P;
34   Ceed ceed;
35   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
36   CeedBasis basis;
37   CeedElemRestriction r;
38   CeedOperatorField *op_fields;
39   CeedQFunctionField *qf_fields;
40   if (is_input) {
41     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
42     CeedChkBackend(ierr);
43     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
44     CeedChkBackend(ierr);
45   } else {
46     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields);
47     CeedChkBackend(ierr);
48     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
49     CeedChkBackend(ierr);
50   }
51 
52   // Loop over fields
53   for (CeedInt i=0; i<num_fields; i++) {
54     CeedEvalMode eval_mode;
55     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
56     CeedChkBackend(ierr);
57 
58     if (eval_mode != CEED_EVAL_WEIGHT) {
59       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
60       CeedChkBackend(ierr);
61       Ceed ceed;
62       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
63       CeedInt num_elem, elem_size, l_size, comp_stride;
64       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
65       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
66       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
67       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
68 
69       bool strided;
70       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
71       if (strided) {
72         CeedInt strides[3];
73         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
74         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
75                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
76         CeedChkBackend(ierr);
77       } else {
78         const CeedInt *offsets = NULL;
79         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
80         CeedChkBackend(ierr);
81         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
82         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
83                                                 blk_size, num_comp, comp_stride,
84                                                 l_size, CEED_MEM_HOST,
85                                                 CEED_COPY_VALUES, offsets,
86                                                 &blk_restr[i+start_e]);
87         CeedChkBackend(ierr);
88         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
89       }
90       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
91                                              &e_vecs_full[i+start_e]);
92       CeedChkBackend(ierr);
93     }
94 
95     switch(eval_mode) {
96     case CEED_EVAL_NONE:
97       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
98       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]);
99       CeedChkBackend(ierr);
100       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
101       CeedChkBackend(ierr);
102       break;
103     case CEED_EVAL_INTERP:
104     case CEED_EVAL_GRAD:
105       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
106       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
107       ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr);
108       ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
109       ierr = CeedVectorCreate(ceed, P*num_comp*blk_size, &e_vecs[i]);
110       CeedChkBackend(ierr);
111       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
112       CeedChkBackend(ierr);
113       break;
114     case CEED_EVAL_WEIGHT: // Only on input fields
115       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
116       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
117       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
118                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
119       CeedChkBackend(ierr);
120 
121       break;
122     case CEED_EVAL_DIV:
123       break; // Not implemented
124     case CEED_EVAL_CURL:
125       break; // Not implemented
126     }
127     if (is_input && !!e_vecs[i]) {
128       ierr = CeedVectorSetArray(e_vecs[i], CEED_MEM_HOST,
129                                 CEED_COPY_VALUES, NULL); CeedChkBackend(ierr);
130     }
131   }
132   return CEED_ERROR_SUCCESS;
133 }
134 
135 //------------------------------------------------------------------------------
136 // Setup Operator
137 //------------------------------------------------------------------------------
138 static int CeedOperatorSetup_Opt(CeedOperator op) {
139   int ierr;
140   bool is_setup_done;
141   ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr);
142   if (is_setup_done) return CEED_ERROR_SUCCESS;
143   Ceed ceed;
144   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
145   Ceed_Opt *ceed_impl;
146   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
147   const CeedInt blk_size = ceed_impl->blk_size;
148   CeedOperator_Opt *impl;
149   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
150   CeedQFunction qf;
151   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
152   CeedInt Q, num_input_fields, num_output_fields;
153   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
154   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
155   CeedOperatorField *op_input_fields, *op_output_fields;
156   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
157                                &num_output_fields, &op_output_fields);
158   CeedChkBackend(ierr);
159   CeedQFunctionField *qf_input_fields, *qf_output_fields;
160   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
161                                 &qf_output_fields);
162   CeedChkBackend(ierr);
163 
164   // Allocate
165   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
166   CeedChkBackend(ierr);
167   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full);
168   CeedChkBackend(ierr);
169 
170   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr);
171   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
172   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
173   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
174   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
175 
176   impl->num_inputs = num_input_fields;
177   impl->num_outputs = num_output_fields;
178 
179   // Set up infield and outfield pointer arrays
180   // Infields
181   ierr = CeedOperatorSetupFields_Opt(qf, op, true, blk_size, impl->blk_restr,
182                                      impl->e_vecs_full, impl->e_vecs_in,
183                                      impl->q_vecs_in, 0, num_input_fields, Q);
184   CeedChkBackend(ierr);
185   // Outfields
186   ierr = CeedOperatorSetupFields_Opt(qf, op, false, blk_size, impl->blk_restr,
187                                      impl->e_vecs_full, impl->e_vecs_out,
188                                      impl->q_vecs_out, num_input_fields,
189                                      num_output_fields, Q);
190   CeedChkBackend(ierr);
191 
192   // Identity QFunctions
193   if (impl->is_identity_qf) {
194     CeedEvalMode in_mode, out_mode;
195     CeedQFunctionField *in_fields, *out_fields;
196     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
197     CeedChkBackend(ierr);
198     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
199     CeedChkBackend(ierr);
200     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
201     CeedChkBackend(ierr);
202 
203     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
204       impl->is_identity_restr_op = true;
205     } else {
206       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
207       impl->q_vecs_out[0] = impl->q_vecs_in[0];
208       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
209     }
210   }
211 
212   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
213 
214   return CEED_ERROR_SUCCESS;
215 }
216 
217 //------------------------------------------------------------------------------
218 // Setup Input Fields
219 //------------------------------------------------------------------------------
220 static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields,
221     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
222     CeedVector in_vec, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
223     CeedRequest *request) {
224   CeedInt ierr;
225   CeedEvalMode eval_mode;
226   CeedVector vec;
227   uint64_t state;
228 
229   for (CeedInt i=0; i<num_input_fields; i++) {
230     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
231     CeedChkBackend(ierr);
232     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
233     } else {
234       // Get input vector
235       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
236       CeedChkBackend(ierr);
237       if (vec != CEED_VECTOR_ACTIVE) {
238         // Restrict
239         ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
240         if (state != impl->input_states[i]) {
241           ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
242                                           vec, impl->e_vecs_full[i], request);
243           CeedChkBackend(ierr);
244           impl->input_states[i] = state;
245         }
246         // Get evec
247         ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST,
248                                       (const CeedScalar **) &e_data[i]);
249         CeedChkBackend(ierr);
250       } else {
251         // Set Qvec for CEED_EVAL_NONE
252         if (eval_mode == CEED_EVAL_NONE) {
253           ierr = CeedVectorGetArrayRead(impl->e_vecs_in[i], CEED_MEM_HOST,
254                                         (const CeedScalar **)&e_data[i]);
255           CeedChkBackend(ierr);
256           ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
257                                     CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr);
258           ierr = CeedVectorRestoreArrayRead(impl->e_vecs_in[i],
259                                             (const CeedScalar **)&e_data[i]);
260           CeedChkBackend(ierr);
261         }
262       }
263     }
264   }
265   return CEED_ERROR_SUCCESS;
266 }
267 
268 //------------------------------------------------------------------------------
269 // Input Basis Action
270 //------------------------------------------------------------------------------
271 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
272     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
273     CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active,
274     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
275     CeedRequest *request) {
276   CeedInt ierr;
277   CeedInt dim, elem_size, size;
278   CeedElemRestriction elem_restr;
279   CeedEvalMode eval_mode;
280   CeedBasis basis;
281   CeedVector vec;
282 
283   for (CeedInt i=0; i<num_input_fields; i++) {
284     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
285     CeedChkBackend(ierr);
286     // Skip active input
287     if (skip_active) {
288       if (vec == CEED_VECTOR_ACTIVE)
289         continue;
290     }
291 
292     CeedInt active_in = 0;
293     // Get elem_size, eval_mode, size
294     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
295     CeedChkBackend(ierr);
296     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
297     CeedChkBackend(ierr);
298     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
299     CeedChkBackend(ierr);
300     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
301     CeedChkBackend(ierr);
302     // Restrict block active input
303     if (vec == CEED_VECTOR_ACTIVE) {
304       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size,
305                                            CEED_NOTRANSPOSE, in_vec,
306                                            impl->e_vecs_in[i], request);
307       CeedChkBackend(ierr);
308       active_in = 1;
309     }
310     // Basis action
311     switch(eval_mode) {
312     case CEED_EVAL_NONE:
313       if (!active_in) {
314         ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
315                                   CEED_USE_POINTER, &e_data[i][e*Q*size]);
316         CeedChkBackend(ierr);
317       }
318       break;
319     case CEED_EVAL_INTERP:
320       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
321       CeedChkBackend(ierr);
322       if (!active_in) {
323         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
324                                   CEED_USE_POINTER, &e_data[i][e*elem_size*size]);
325         CeedChkBackend(ierr);
326       }
327       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
328                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
329                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
330       break;
331     case CEED_EVAL_GRAD:
332       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
333       CeedChkBackend(ierr);
334       if (!active_in) {
335         ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
336         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
337                                   CEED_USE_POINTER,
338                                   &e_data[i][e*elem_size*size/dim]);
339         CeedChkBackend(ierr);
340       }
341       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
342                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
343                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
344       break;
345     case CEED_EVAL_WEIGHT:
346       break;  // No action
347     // LCOV_EXCL_START
348     case CEED_EVAL_DIV:
349     case CEED_EVAL_CURL: {
350       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
351       CeedChkBackend(ierr);
352       Ceed ceed;
353       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
354       return CeedError(ceed, CEED_ERROR_BACKEND,
355                        "Ceed evaluation mode not implemented");
356       // LCOV_EXCL_STOP
357     }
358     }
359   }
360   return CEED_ERROR_SUCCESS;
361 }
362 
363 //------------------------------------------------------------------------------
364 // Output Basis Action
365 //------------------------------------------------------------------------------
366 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
367     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
368     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
369     CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl,
370     CeedRequest *request) {
371   CeedInt ierr;
372   CeedElemRestriction elem_restr;
373   CeedEvalMode eval_mode;
374   CeedBasis basis;
375   CeedVector vec;
376 
377   for (CeedInt i=0; i<num_output_fields; i++) {
378     // Get elem_size, eval_mode, size
379     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
380     CeedChkBackend(ierr);
381     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
382     CeedChkBackend(ierr);
383     // Basis action
384     switch(eval_mode) {
385     case CEED_EVAL_NONE:
386       break; // No action
387     case CEED_EVAL_INTERP:
388       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
389       CeedChkBackend(ierr);
390       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
391                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
392                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
393       break;
394     case CEED_EVAL_GRAD:
395       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
396       CeedChkBackend(ierr);
397       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
398                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
399                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
400       break;
401     // LCOV_EXCL_START
402     case CEED_EVAL_WEIGHT: {
403       Ceed ceed;
404       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
405       return CeedError(ceed, CEED_ERROR_BACKEND,
406                        "CEED_EVAL_WEIGHT cannot be an output "
407                        "evaluation mode");
408     }
409     case CEED_EVAL_DIV:
410     case CEED_EVAL_CURL: {
411       Ceed ceed;
412       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
413       return CeedError(ceed, CEED_ERROR_BACKEND,
414                        "Ceed evaluation mode not implemented");
415       // LCOV_EXCL_STOP
416     }
417     }
418     // Restrict output block
419     // Get output vector
420     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
421     CeedChkBackend(ierr);
422     if (vec == CEED_VECTOR_ACTIVE)
423       vec = out_vec;
424     // Restrict
425     ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_inputs],
426                                          e/blk_size, CEED_TRANSPOSE,
427                                          impl->e_vecs_out[i], vec, request);
428     CeedChkBackend(ierr);
429   }
430   return CEED_ERROR_SUCCESS;
431 }
432 
433 //------------------------------------------------------------------------------
434 // Restore Input Vectors
435 //------------------------------------------------------------------------------
436 static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields,
437     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
438     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) {
439   CeedInt ierr;
440 
441   for (CeedInt i=0; i<num_input_fields; i++) {
442     CeedEvalMode eval_mode;
443     CeedVector vec;
444     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
445     CeedChkBackend(ierr);
446     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
447     CeedChkBackend(ierr);
448     if (eval_mode != CEED_EVAL_WEIGHT && vec != CEED_VECTOR_ACTIVE) {
449       ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i],
450                                         (const CeedScalar **) &e_data[i]);
451       CeedChkBackend(ierr);
452     }
453   }
454   return CEED_ERROR_SUCCESS;
455 }
456 
457 //------------------------------------------------------------------------------
458 // Operator Apply
459 //------------------------------------------------------------------------------
460 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec,
461                                     CeedVector out_vec, CeedRequest *request) {
462   int ierr;
463   Ceed ceed;
464   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
465   Ceed_Opt *ceed_impl;
466   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
467   CeedInt blk_size = ceed_impl->blk_size;
468   CeedOperator_Opt *impl;
469   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
470   CeedInt Q, num_input_fields, num_output_fields, num_elem;
471   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
472   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
473   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
474   CeedQFunction qf;
475   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
476   CeedOperatorField *op_input_fields, *op_output_fields;
477   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
478                                &num_output_fields, &op_output_fields);
479   CeedChkBackend(ierr);
480   CeedQFunctionField *qf_input_fields, *qf_output_fields;
481   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
482                                 &qf_output_fields);
483   CeedChkBackend(ierr);
484   CeedEvalMode eval_mode;
485   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
486 
487   // Setup
488   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
489 
490   // Restriction only operator
491   if (impl->is_identity_restr_op) {
492     for (CeedInt b=0; b<num_blks; b++) {
493       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE,
494                                            in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr);
495       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE,
496                                            impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr);
497     }
498     return CEED_ERROR_SUCCESS;
499   }
500 
501   // Input Evecs and Restriction
502   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
503                                      op_input_fields, in_vec, e_data,
504                                      impl, request); CeedChkBackend(ierr);
505 
506   // Output Lvecs, Evecs, and Qvecs
507   for (CeedInt i=0; i<num_output_fields; i++) {
508     // Set Qvec if needed
509     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
510     CeedChkBackend(ierr);
511     if (eval_mode == CEED_EVAL_NONE) {
512       // Set qvec to single block evec
513       ierr = CeedVectorGetArrayWrite(impl->e_vecs_out[i], CEED_MEM_HOST,
514                                      &e_data[i + num_input_fields]);
515       CeedChkBackend(ierr);
516       ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
517                                 CEED_USE_POINTER, e_data[i + num_input_fields]);
518       CeedChkBackend(ierr);
519       ierr = CeedVectorRestoreArray(impl->e_vecs_out[i],
520                                     &e_data[i + num_input_fields]);
521       CeedChkBackend(ierr);
522     }
523   }
524 
525   // Loop through elements
526   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
527     // Input basis apply
528     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
529                                       num_input_fields, blk_size, in_vec, false,
530                                       e_data, impl, request); CeedChkBackend(ierr);
531 
532     // Q function
533     if (!impl->is_identity_qf) {
534       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
535       CeedChkBackend(ierr);
536     }
537 
538     // Output basis apply and restrict
539     ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields,
540                                        blk_size, num_input_fields, num_output_fields,
541                                        op, out_vec, impl, request);
542     CeedChkBackend(ierr);
543   }
544 
545   // Restore input arrays
546   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
547                                        op_input_fields, e_data, impl);
548   CeedChkBackend(ierr);
549 
550   return CEED_ERROR_SUCCESS;
551 }
552 
553 //------------------------------------------------------------------------------
554 // Core code for linear QFunction assembly
555 //------------------------------------------------------------------------------
556 static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op,
557     bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
558     CeedRequest *request) {
559   int ierr;
560   Ceed ceed;
561   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
562   Ceed_Opt *ceed_impl;
563   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
564   const CeedInt blk_size = ceed_impl->blk_size;
565   CeedOperator_Opt *impl;
566   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
567   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
568   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
569   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
570   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
571   CeedQFunction qf;
572   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
573   CeedOperatorField *op_input_fields, *op_output_fields;
574   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
575                                &num_output_fields, &op_output_fields);
576   CeedChkBackend(ierr);
577   CeedQFunctionField *qf_input_fields, *qf_output_fields;
578   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
579                                 &qf_output_fields);
580   CeedChkBackend(ierr);
581   CeedVector vec, l_vec = impl->qf_l_vec;
582   CeedInt num_active_in = impl->num_active_in,
583           num_active_out = impl->num_active_out;
584   CeedVector *active_in = impl->qf_active_in;
585   CeedScalar *a, *tmp;
586   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
587 
588   // Setup
589   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
590 
591   // Check for identity
592   if (impl->is_identity_qf)
593     // LCOV_EXCL_START
594     return CeedError(ceed, CEED_ERROR_BACKEND,
595                      "Assembling identity qfunctions not supported");
596   // LCOV_EXCL_STOP
597 
598   // Input Evecs and Restriction
599   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
600                                      op_input_fields, NULL, e_data,
601                                      impl, request); CeedChkBackend(ierr);
602 
603   // Count number of active input fields
604   if (!num_active_in) {
605     for (CeedInt i=0; i<num_input_fields; i++) {
606       // Get input vector
607       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
608       CeedChkBackend(ierr);
609       // Check if active input
610       if (vec == CEED_VECTOR_ACTIVE) {
611         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
612         CeedChkBackend(ierr);
613         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
614         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
615         CeedChkBackend(ierr);
616         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
617         for (CeedInt field=0; field<size; field++) {
618           ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
619           CeedChkBackend(ierr);
620           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
621                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
622           CeedChkBackend(ierr);
623         }
624         num_active_in += size;
625         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
626       }
627     }
628     impl->num_active_in = num_active_in;
629     impl->qf_active_in = active_in;
630   }
631 
632   // Count number of active output fields
633   if (!num_active_out) {
634     for (CeedInt i=0; i<num_output_fields; i++) {
635       // Get output vector
636       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
637       CeedChkBackend(ierr);
638       // Check if active output
639       if (vec == CEED_VECTOR_ACTIVE) {
640         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
641         CeedChkBackend(ierr);
642         num_active_out += size;
643       }
644     }
645     impl->num_active_out = num_active_out;
646   }
647 
648   // Check sizes
649   if (!num_active_in || !num_active_out)
650     // LCOV_EXCL_START
651     return CeedError(ceed, CEED_ERROR_BACKEND,
652                      "Cannot assemble QFunction without active inputs "
653                      "and outputs");
654   // LCOV_EXCL_STOP
655 
656   // Setup l_vec
657   if (!l_vec) {
658     ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
659                             &l_vec); CeedChkBackend(ierr);
660     ierr = CeedVectorSetValue(l_vec, 0.0); CeedChkBackend(ierr);
661     impl->qf_l_vec = l_vec;
662   }
663   ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
664 
665   // Build objects if needed
666   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
667   if (build_objects) {
668     // Create output restriction
669     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
670                                             num_active_in*num_active_out,
671                                             num_active_in*num_active_out*num_elem*Q,
672                                             strides, rstr); CeedChkBackend(ierr);
673     // Create assembled vector
674     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
675                             assembled); CeedChkBackend(ierr);
676   }
677 
678   // Loop through elements
679   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
680     // Input basis apply
681     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
682                                       num_input_fields, blk_size, NULL, true,
683                                       e_data, impl, request); CeedChkBackend(ierr);
684 
685     // Assemble QFunction
686     for (CeedInt in=0; in<num_active_in; in++) {
687       // Set Inputs
688       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
689       if (num_active_in > 1) {
690         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
691                                   0.0); CeedChkBackend(ierr);
692       }
693       // Set Outputs
694       for (CeedInt out=0; out<num_output_fields; out++) {
695         // Get output vector
696         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
697         CeedChkBackend(ierr);
698         // Check if active output
699         if (vec == CEED_VECTOR_ACTIVE) {
700           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
701                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
702           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
703           CeedChkBackend(ierr);
704           a += size*Q*blk_size; // Advance the pointer by the size of the output
705         }
706       }
707       // Apply QFunction
708       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
709       CeedChkBackend(ierr);
710     }
711   }
712 
713   // Un-set output Qvecs to prevent accidental overwrite of Assembled
714   for (CeedInt out=0; out<num_output_fields; out++) {
715     // Get output vector
716     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
717     CeedChkBackend(ierr);
718     // Check if active output
719     if (vec == CEED_VECTOR_ACTIVE) {
720       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
721                          NULL); CeedChkBackend(ierr);
722     }
723   }
724 
725   // Restore input arrays
726   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
727                                        op_input_fields, e_data, impl);
728   CeedChkBackend(ierr);
729 
730   // Output blocked restriction
731   ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr);
732   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
733   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
734   if (!blk_rstr) {
735     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
736            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
737            strides, &blk_rstr); CeedChkBackend(ierr);
738     impl->qf_blk_rstr = blk_rstr;
739   }
740   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled,
741                                   request); CeedChkBackend(ierr);
742 
743   return CEED_ERROR_SUCCESS;
744 }
745 
746 //------------------------------------------------------------------------------
747 // Assemble Linear QFunction
748 //------------------------------------------------------------------------------
749 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op,
750     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
751   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr,
752          request);
753 }
754 
755 //------------------------------------------------------------------------------
756 // Update Assembled Linear QFunction
757 //------------------------------------------------------------------------------
758 static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op,
759     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
760   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled,
761          &rstr, request);
762 }
763 
764 //------------------------------------------------------------------------------
765 // Operator Destroy
766 //------------------------------------------------------------------------------
767 static int CeedOperatorDestroy_Opt(CeedOperator op) {
768   int ierr;
769   CeedOperator_Opt *impl;
770   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
771 
772   for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) {
773     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
774     ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr);
775   }
776   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
777   ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr);
778   ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr);
779 
780   for (CeedInt i=0; i<impl->num_inputs; i++) {
781     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
782     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
783   }
784   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
785   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
786 
787   for (CeedInt i=0; i<impl->num_outputs; i++) {
788     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
789     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
790   }
791   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
792   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
793 
794   // QFunction assembly data
795   for (CeedInt i=0; i<impl->num_active_in; i++) {
796     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
797   }
798   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
799   ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr);
800   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
801 
802   ierr = CeedFree(&impl); CeedChkBackend(ierr);
803   return CEED_ERROR_SUCCESS;
804 }
805 
806 //------------------------------------------------------------------------------
807 // Operator Create
808 //------------------------------------------------------------------------------
809 int CeedOperatorCreate_Opt(CeedOperator op) {
810   int ierr;
811   Ceed ceed;
812   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
813   Ceed_Opt *ceed_impl;
814   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
815   CeedInt blk_size = ceed_impl->blk_size;
816   CeedOperator_Opt *impl;
817 
818   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
819   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
820 
821   if (blk_size != 1 && blk_size != 8)
822     // LCOV_EXCL_START
823     return CeedError(ceed, CEED_ERROR_BACKEND,
824                      "Opt backend cannot use blocksize: %d", blk_size);
825   // LCOV_EXCL_STOP
826 
827   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
828                                 CeedOperatorLinearAssembleQFunction_Opt);
829   CeedChkBackend(ierr);
830   ierr = CeedSetBackendFunction(ceed, "Operator", op,
831                                 "LinearAssembleQFunctionUpdate",
832                                 CeedOperatorLinearAssembleQFunctionUpdate_Opt);
833   CeedChkBackend(ierr);
834   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
835                                 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr);
836   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
837                                 CeedOperatorDestroy_Opt); CeedChkBackend(ierr);
838   return CEED_ERROR_SUCCESS;
839 }
840 //------------------------------------------------------------------------------
841