xref: /libCEED/backends/opt/ceed-opt-operator.c (revision 13964f0727a62e5421e6d3b433e838b96a9ce891)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include "ceed-opt.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
28                                        bool is_input, const CeedInt blk_size,
29                                        CeedElemRestriction *blk_restr,
30                                        CeedVector *e_vecs_full, CeedVector *e_vecs,
31                                        CeedVector *q_vecs, CeedInt start_e,
32                                        CeedInt num_fields, CeedInt Q) {
33   CeedInt ierr, num_comp, size, P;
34   Ceed ceed;
35   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
36   CeedBasis basis;
37   CeedElemRestriction r;
38   CeedOperatorField *op_fields;
39   CeedQFunctionField *qf_fields;
40   if (is_input) {
41     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
42     CeedChkBackend(ierr);
43     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
44     CeedChkBackend(ierr);
45   } else {
46     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields);
47     CeedChkBackend(ierr);
48     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
49     CeedChkBackend(ierr);
50   }
51 
52   // Loop over fields
53   for (CeedInt i=0; i<num_fields; i++) {
54     CeedEvalMode eval_mode;
55     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
56     CeedChkBackend(ierr);
57 
58     if (eval_mode != CEED_EVAL_WEIGHT) {
59       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
60       CeedChkBackend(ierr);
61       Ceed ceed;
62       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
63       CeedSize l_size;
64       CeedInt num_elem, elem_size, comp_stride;
65       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
66       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
67       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
68       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
69 
70       bool strided;
71       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
72       if (strided) {
73         CeedInt strides[3];
74         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
75         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
76                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
77         CeedChkBackend(ierr);
78       } else {
79         const CeedInt *offsets = NULL;
80         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
81         CeedChkBackend(ierr);
82         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
83         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
84                                                 blk_size, num_comp, comp_stride,
85                                                 l_size, CEED_MEM_HOST,
86                                                 CEED_COPY_VALUES, offsets,
87                                                 &blk_restr[i+start_e]);
88         CeedChkBackend(ierr);
89         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
90       }
91       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
92                                              &e_vecs_full[i+start_e]);
93       CeedChkBackend(ierr);
94     }
95 
96     switch(eval_mode) {
97     case CEED_EVAL_NONE:
98       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
99       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]);
100       CeedChkBackend(ierr);
101       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
102       CeedChkBackend(ierr);
103       break;
104     case CEED_EVAL_INTERP:
105     case CEED_EVAL_GRAD:
106       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
107       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
108       ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr);
109       ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
110       ierr = CeedVectorCreate(ceed, P*num_comp*blk_size, &e_vecs[i]);
111       CeedChkBackend(ierr);
112       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
113       CeedChkBackend(ierr);
114       break;
115     case CEED_EVAL_WEIGHT: // Only on input fields
116       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
117       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
118       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
119                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
120       CeedChkBackend(ierr);
121 
122       break;
123     case CEED_EVAL_DIV:
124       break; // Not implemented
125     case CEED_EVAL_CURL:
126       break; // Not implemented
127     }
128     if (is_input && !!e_vecs[i]) {
129       ierr = CeedVectorSetArray(e_vecs[i], CEED_MEM_HOST,
130                                 CEED_COPY_VALUES, NULL); CeedChkBackend(ierr);
131     }
132   }
133   return CEED_ERROR_SUCCESS;
134 }
135 
136 //------------------------------------------------------------------------------
137 // Setup Operator
138 //------------------------------------------------------------------------------
139 static int CeedOperatorSetup_Opt(CeedOperator op) {
140   int ierr;
141   bool is_setup_done;
142   ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr);
143   if (is_setup_done) return CEED_ERROR_SUCCESS;
144   Ceed ceed;
145   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
146   Ceed_Opt *ceed_impl;
147   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
148   const CeedInt blk_size = ceed_impl->blk_size;
149   CeedOperator_Opt *impl;
150   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
151   CeedQFunction qf;
152   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
153   CeedInt Q, num_input_fields, num_output_fields;
154   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
155   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
156   CeedOperatorField *op_input_fields, *op_output_fields;
157   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
158                                &num_output_fields, &op_output_fields);
159   CeedChkBackend(ierr);
160   CeedQFunctionField *qf_input_fields, *qf_output_fields;
161   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
162                                 &qf_output_fields);
163   CeedChkBackend(ierr);
164 
165   // Allocate
166   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
167   CeedChkBackend(ierr);
168   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full);
169   CeedChkBackend(ierr);
170 
171   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr);
172   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
173   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
174   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
175   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
176 
177   impl->num_inputs = num_input_fields;
178   impl->num_outputs = num_output_fields;
179 
180   // Set up infield and outfield pointer arrays
181   // Infields
182   ierr = CeedOperatorSetupFields_Opt(qf, op, true, blk_size, impl->blk_restr,
183                                      impl->e_vecs_full, impl->e_vecs_in,
184                                      impl->q_vecs_in, 0, num_input_fields, Q);
185   CeedChkBackend(ierr);
186   // Outfields
187   ierr = CeedOperatorSetupFields_Opt(qf, op, false, blk_size, impl->blk_restr,
188                                      impl->e_vecs_full, impl->e_vecs_out,
189                                      impl->q_vecs_out, num_input_fields,
190                                      num_output_fields, Q);
191   CeedChkBackend(ierr);
192 
193   // Identity QFunctions
194   if (impl->is_identity_qf) {
195     CeedEvalMode in_mode, out_mode;
196     CeedQFunctionField *in_fields, *out_fields;
197     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
198     CeedChkBackend(ierr);
199     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
200     CeedChkBackend(ierr);
201     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
202     CeedChkBackend(ierr);
203 
204     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
205       impl->is_identity_restr_op = true;
206     } else {
207       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
208       impl->q_vecs_out[0] = impl->q_vecs_in[0];
209       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
210     }
211   }
212 
213   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
214 
215   return CEED_ERROR_SUCCESS;
216 }
217 
218 //------------------------------------------------------------------------------
219 // Setup Input Fields
220 //------------------------------------------------------------------------------
221 static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields,
222     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
223     CeedVector in_vec, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
224     CeedRequest *request) {
225   CeedInt ierr;
226   CeedEvalMode eval_mode;
227   CeedVector vec;
228   uint64_t state;
229 
230   for (CeedInt i=0; i<num_input_fields; i++) {
231     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
232     CeedChkBackend(ierr);
233     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
234     } else {
235       // Get input vector
236       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
237       CeedChkBackend(ierr);
238       if (vec != CEED_VECTOR_ACTIVE) {
239         // Restrict
240         ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
241         if (state != impl->input_states[i]) {
242           ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
243                                           vec, impl->e_vecs_full[i], request);
244           CeedChkBackend(ierr);
245           impl->input_states[i] = state;
246         }
247         // Get evec
248         ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST,
249                                       (const CeedScalar **) &e_data[i]);
250         CeedChkBackend(ierr);
251       } else {
252         // Set Qvec for CEED_EVAL_NONE
253         if (eval_mode == CEED_EVAL_NONE) {
254           ierr = CeedVectorGetArrayRead(impl->e_vecs_in[i], CEED_MEM_HOST,
255                                         (const CeedScalar **)&e_data[i]);
256           CeedChkBackend(ierr);
257           ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
258                                     CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr);
259           ierr = CeedVectorRestoreArrayRead(impl->e_vecs_in[i],
260                                             (const CeedScalar **)&e_data[i]);
261           CeedChkBackend(ierr);
262         }
263       }
264     }
265   }
266   return CEED_ERROR_SUCCESS;
267 }
268 
269 //------------------------------------------------------------------------------
270 // Input Basis Action
271 //------------------------------------------------------------------------------
272 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
273     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
274     CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active,
275     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
276     CeedRequest *request) {
277   CeedInt ierr;
278   CeedInt dim, elem_size, size;
279   CeedElemRestriction elem_restr;
280   CeedEvalMode eval_mode;
281   CeedBasis basis;
282   CeedVector vec;
283 
284   for (CeedInt i=0; i<num_input_fields; i++) {
285     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
286     CeedChkBackend(ierr);
287     // Skip active input
288     if (skip_active) {
289       if (vec == CEED_VECTOR_ACTIVE)
290         continue;
291     }
292 
293     CeedInt active_in = 0;
294     // Get elem_size, eval_mode, size
295     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
296     CeedChkBackend(ierr);
297     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
298     CeedChkBackend(ierr);
299     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
300     CeedChkBackend(ierr);
301     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
302     CeedChkBackend(ierr);
303     // Restrict block active input
304     if (vec == CEED_VECTOR_ACTIVE) {
305       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size,
306                                            CEED_NOTRANSPOSE, in_vec,
307                                            impl->e_vecs_in[i], request);
308       CeedChkBackend(ierr);
309       active_in = 1;
310     }
311     // Basis action
312     switch(eval_mode) {
313     case CEED_EVAL_NONE:
314       if (!active_in) {
315         ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
316                                   CEED_USE_POINTER, &e_data[i][e*Q*size]);
317         CeedChkBackend(ierr);
318       }
319       break;
320     case CEED_EVAL_INTERP:
321       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
322       CeedChkBackend(ierr);
323       if (!active_in) {
324         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
325                                   CEED_USE_POINTER, &e_data[i][e*elem_size*size]);
326         CeedChkBackend(ierr);
327       }
328       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
329                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
330                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
331       break;
332     case CEED_EVAL_GRAD:
333       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
334       CeedChkBackend(ierr);
335       if (!active_in) {
336         ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
337         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
338                                   CEED_USE_POINTER,
339                                   &e_data[i][e*elem_size*size/dim]);
340         CeedChkBackend(ierr);
341       }
342       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
343                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
344                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
345       break;
346     case CEED_EVAL_WEIGHT:
347       break;  // No action
348     // LCOV_EXCL_START
349     case CEED_EVAL_DIV:
350     case CEED_EVAL_CURL: {
351       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
352       CeedChkBackend(ierr);
353       Ceed ceed;
354       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
355       return CeedError(ceed, CEED_ERROR_BACKEND,
356                        "Ceed evaluation mode not implemented");
357       // LCOV_EXCL_STOP
358     }
359     }
360   }
361   return CEED_ERROR_SUCCESS;
362 }
363 
364 //------------------------------------------------------------------------------
365 // Output Basis Action
366 //------------------------------------------------------------------------------
367 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
368     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
369     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
370     CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl,
371     CeedRequest *request) {
372   CeedInt ierr;
373   CeedElemRestriction elem_restr;
374   CeedEvalMode eval_mode;
375   CeedBasis basis;
376   CeedVector vec;
377 
378   for (CeedInt i=0; i<num_output_fields; i++) {
379     // Get elem_size, eval_mode, size
380     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
381     CeedChkBackend(ierr);
382     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
383     CeedChkBackend(ierr);
384     // Basis action
385     switch(eval_mode) {
386     case CEED_EVAL_NONE:
387       break; // No action
388     case CEED_EVAL_INTERP:
389       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
390       CeedChkBackend(ierr);
391       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
392                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
393                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
394       break;
395     case CEED_EVAL_GRAD:
396       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
397       CeedChkBackend(ierr);
398       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
399                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
400                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
401       break;
402     // LCOV_EXCL_START
403     case CEED_EVAL_WEIGHT: {
404       Ceed ceed;
405       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
406       return CeedError(ceed, CEED_ERROR_BACKEND,
407                        "CEED_EVAL_WEIGHT cannot be an output "
408                        "evaluation mode");
409     }
410     case CEED_EVAL_DIV:
411     case CEED_EVAL_CURL: {
412       Ceed ceed;
413       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
414       return CeedError(ceed, CEED_ERROR_BACKEND,
415                        "Ceed evaluation mode not implemented");
416       // LCOV_EXCL_STOP
417     }
418     }
419     // Restrict output block
420     // Get output vector
421     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
422     CeedChkBackend(ierr);
423     if (vec == CEED_VECTOR_ACTIVE)
424       vec = out_vec;
425     // Restrict
426     ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_inputs],
427                                          e/blk_size, CEED_TRANSPOSE,
428                                          impl->e_vecs_out[i], vec, request);
429     CeedChkBackend(ierr);
430   }
431   return CEED_ERROR_SUCCESS;
432 }
433 
434 //------------------------------------------------------------------------------
435 // Restore Input Vectors
436 //------------------------------------------------------------------------------
437 static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields,
438     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
439     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) {
440   CeedInt ierr;
441 
442   for (CeedInt i=0; i<num_input_fields; i++) {
443     CeedEvalMode eval_mode;
444     CeedVector vec;
445     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
446     CeedChkBackend(ierr);
447     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
448     CeedChkBackend(ierr);
449     if (eval_mode != CEED_EVAL_WEIGHT && vec != CEED_VECTOR_ACTIVE) {
450       ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i],
451                                         (const CeedScalar **) &e_data[i]);
452       CeedChkBackend(ierr);
453     }
454   }
455   return CEED_ERROR_SUCCESS;
456 }
457 
458 //------------------------------------------------------------------------------
459 // Operator Apply
460 //------------------------------------------------------------------------------
461 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec,
462                                     CeedVector out_vec, CeedRequest *request) {
463   int ierr;
464   Ceed ceed;
465   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
466   Ceed_Opt *ceed_impl;
467   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
468   CeedInt blk_size = ceed_impl->blk_size;
469   CeedOperator_Opt *impl;
470   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
471   CeedInt Q, num_input_fields, num_output_fields, num_elem;
472   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
473   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
474   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
475   CeedQFunction qf;
476   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
477   CeedOperatorField *op_input_fields, *op_output_fields;
478   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
479                                &num_output_fields, &op_output_fields);
480   CeedChkBackend(ierr);
481   CeedQFunctionField *qf_input_fields, *qf_output_fields;
482   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
483                                 &qf_output_fields);
484   CeedChkBackend(ierr);
485   CeedEvalMode eval_mode;
486   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
487 
488   // Setup
489   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
490 
491   // Restriction only operator
492   if (impl->is_identity_restr_op) {
493     for (CeedInt b=0; b<num_blks; b++) {
494       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE,
495                                            in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr);
496       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE,
497                                            impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr);
498     }
499     return CEED_ERROR_SUCCESS;
500   }
501 
502   // Input Evecs and Restriction
503   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
504                                      op_input_fields, in_vec, e_data,
505                                      impl, request); CeedChkBackend(ierr);
506 
507   // Output Lvecs, Evecs, and Qvecs
508   for (CeedInt i=0; i<num_output_fields; i++) {
509     // Set Qvec if needed
510     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
511     CeedChkBackend(ierr);
512     if (eval_mode == CEED_EVAL_NONE) {
513       // Set qvec to single block evec
514       ierr = CeedVectorGetArrayWrite(impl->e_vecs_out[i], CEED_MEM_HOST,
515                                      &e_data[i + num_input_fields]);
516       CeedChkBackend(ierr);
517       ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
518                                 CEED_USE_POINTER, e_data[i + num_input_fields]);
519       CeedChkBackend(ierr);
520       ierr = CeedVectorRestoreArray(impl->e_vecs_out[i],
521                                     &e_data[i + num_input_fields]);
522       CeedChkBackend(ierr);
523     }
524   }
525 
526   // Loop through elements
527   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
528     // Input basis apply
529     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
530                                       num_input_fields, blk_size, in_vec, false,
531                                       e_data, impl, request); CeedChkBackend(ierr);
532 
533     // Q function
534     if (!impl->is_identity_qf) {
535       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
536       CeedChkBackend(ierr);
537     }
538 
539     // Output basis apply and restrict
540     ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields,
541                                        blk_size, num_input_fields, num_output_fields,
542                                        op, out_vec, impl, request);
543     CeedChkBackend(ierr);
544   }
545 
546   // Restore input arrays
547   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
548                                        op_input_fields, e_data, impl);
549   CeedChkBackend(ierr);
550 
551   return CEED_ERROR_SUCCESS;
552 }
553 
554 //------------------------------------------------------------------------------
555 // Core code for linear QFunction assembly
556 //------------------------------------------------------------------------------
557 static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op,
558     bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
559     CeedRequest *request) {
560   int ierr;
561   Ceed ceed;
562   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
563   Ceed_Opt *ceed_impl;
564   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
565   const CeedInt blk_size = ceed_impl->blk_size;
566   CeedOperator_Opt *impl;
567   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
568   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
569   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
570   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
571   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
572   CeedQFunction qf;
573   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
574   CeedOperatorField *op_input_fields, *op_output_fields;
575   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
576                                &num_output_fields, &op_output_fields);
577   CeedChkBackend(ierr);
578   CeedQFunctionField *qf_input_fields, *qf_output_fields;
579   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
580                                 &qf_output_fields);
581   CeedChkBackend(ierr);
582   CeedVector vec, l_vec = impl->qf_l_vec;
583   CeedInt num_active_in = impl->num_active_in,
584           num_active_out = impl->num_active_out;
585   CeedVector *active_in = impl->qf_active_in;
586   CeedScalar *a, *tmp;
587   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
588 
589   // Setup
590   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
591 
592   // Check for identity
593   if (impl->is_identity_qf)
594     // LCOV_EXCL_START
595     return CeedError(ceed, CEED_ERROR_BACKEND,
596                      "Assembling identity qfunctions not supported");
597   // LCOV_EXCL_STOP
598 
599   // Input Evecs and Restriction
600   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
601                                      op_input_fields, NULL, e_data,
602                                      impl, request); CeedChkBackend(ierr);
603 
604   // Count number of active input fields
605   if (!num_active_in) {
606     for (CeedInt i=0; i<num_input_fields; i++) {
607       // Get input vector
608       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
609       CeedChkBackend(ierr);
610       // Check if active input
611       if (vec == CEED_VECTOR_ACTIVE) {
612         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
613         CeedChkBackend(ierr);
614         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
615         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
616         CeedChkBackend(ierr);
617         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
618         for (CeedInt field=0; field<size; field++) {
619           ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
620           CeedChkBackend(ierr);
621           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
622                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
623           CeedChkBackend(ierr);
624         }
625         num_active_in += size;
626         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
627       }
628     }
629     impl->num_active_in = num_active_in;
630     impl->qf_active_in = active_in;
631   }
632 
633   // Count number of active output fields
634   if (!num_active_out) {
635     for (CeedInt i=0; i<num_output_fields; i++) {
636       // Get output vector
637       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
638       CeedChkBackend(ierr);
639       // Check if active output
640       if (vec == CEED_VECTOR_ACTIVE) {
641         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
642         CeedChkBackend(ierr);
643         num_active_out += size;
644       }
645     }
646     impl->num_active_out = num_active_out;
647   }
648 
649   // Check sizes
650   if (!num_active_in || !num_active_out)
651     // LCOV_EXCL_START
652     return CeedError(ceed, CEED_ERROR_BACKEND,
653                      "Cannot assemble QFunction without active inputs "
654                      "and outputs");
655   // LCOV_EXCL_STOP
656 
657   // Setup l_vec
658   if (!l_vec) {
659     ierr = CeedVectorCreate(ceed, blk_size*Q*num_active_in*num_active_out,
660                             &l_vec); CeedChkBackend(ierr);
661     ierr = CeedVectorSetValue(l_vec, 0.0); CeedChkBackend(ierr);
662     impl->qf_l_vec = l_vec;
663   }
664 
665   // Build objects if needed
666   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
667   if (build_objects) {
668     // Create output restriction
669     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
670                                             num_active_in*num_active_out,
671                                             num_active_in*num_active_out*num_elem*Q,
672                                             strides, rstr); CeedChkBackend(ierr);
673     // Create assembled vector
674     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
675                             assembled); CeedChkBackend(ierr);
676   }
677 
678   // Output blocked restriction
679   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
680   if (!blk_rstr) {
681     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
682            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
683            strides, &blk_rstr); CeedChkBackend(ierr);
684     impl->qf_blk_rstr = blk_rstr;
685   }
686 
687   // Loop through elements
688   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
689   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
690     ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
691 
692     // Input basis apply
693     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
694                                       num_input_fields, blk_size, NULL, true,
695                                       e_data, impl, request); CeedChkBackend(ierr);
696 
697     // Assemble QFunction
698     for (CeedInt in=0; in<num_active_in; in++) {
699       // Set Inputs
700       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
701       if (num_active_in > 1) {
702         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
703                                   0.0); CeedChkBackend(ierr);
704       }
705       // Set Outputs
706       for (CeedInt out=0; out<num_output_fields; out++) {
707         // Get output vector
708         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
709         CeedChkBackend(ierr);
710         // Check if active output
711         if (vec == CEED_VECTOR_ACTIVE) {
712           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
713                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
714           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
715           CeedChkBackend(ierr);
716           a += size*Q*blk_size; // Advance the pointer by the size of the output
717         }
718       }
719       // Apply QFunction
720       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
721       CeedChkBackend(ierr);
722     }
723 
724     // Assemble into assembled vector
725     ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr);
726     ierr = CeedElemRestrictionApplyBlock(blk_rstr, e/blk_size, CEED_TRANSPOSE,
727                                          l_vec, *assembled, request); CeedChkBackend(ierr);
728   }
729 
730   // Un-set output Qvecs to prevent accidental overwrite of Assembled
731   for (CeedInt out=0; out<num_output_fields; out++) {
732     // Get output vector
733     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
734     CeedChkBackend(ierr);
735     // Check if active output
736     if (vec == CEED_VECTOR_ACTIVE) {
737       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
738                          NULL); CeedChkBackend(ierr);
739     }
740   }
741 
742   // Restore input arrays
743   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
744                                        op_input_fields, e_data, impl);
745   CeedChkBackend(ierr);
746 
747   return CEED_ERROR_SUCCESS;
748 }
749 
750 //------------------------------------------------------------------------------
751 // Assemble Linear QFunction
752 //------------------------------------------------------------------------------
753 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op,
754     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
755   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr,
756          request);
757 }
758 
759 //------------------------------------------------------------------------------
760 // Update Assembled Linear QFunction
761 //------------------------------------------------------------------------------
762 static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op,
763     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
764   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled,
765          &rstr, request);
766 }
767 
768 //------------------------------------------------------------------------------
769 // Operator Destroy
770 //------------------------------------------------------------------------------
771 static int CeedOperatorDestroy_Opt(CeedOperator op) {
772   int ierr;
773   CeedOperator_Opt *impl;
774   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
775 
776   for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) {
777     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
778     ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr);
779   }
780   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
781   ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr);
782   ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr);
783 
784   for (CeedInt i=0; i<impl->num_inputs; i++) {
785     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
786     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
787   }
788   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
789   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
790 
791   for (CeedInt i=0; i<impl->num_outputs; i++) {
792     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
793     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
794   }
795   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
796   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
797 
798   // QFunction assembly data
799   for (CeedInt i=0; i<impl->num_active_in; i++) {
800     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
801   }
802   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
803   ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr);
804   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
805 
806   ierr = CeedFree(&impl); CeedChkBackend(ierr);
807   return CEED_ERROR_SUCCESS;
808 }
809 
810 //------------------------------------------------------------------------------
811 // Operator Create
812 //------------------------------------------------------------------------------
813 int CeedOperatorCreate_Opt(CeedOperator op) {
814   int ierr;
815   Ceed ceed;
816   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
817   Ceed_Opt *ceed_impl;
818   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
819   CeedInt blk_size = ceed_impl->blk_size;
820   CeedOperator_Opt *impl;
821 
822   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
823   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
824 
825   if (blk_size != 1 && blk_size != 8)
826     // LCOV_EXCL_START
827     return CeedError(ceed, CEED_ERROR_BACKEND,
828                      "Opt backend cannot use blocksize: %d", blk_size);
829   // LCOV_EXCL_STOP
830 
831   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
832                                 CeedOperatorLinearAssembleQFunction_Opt);
833   CeedChkBackend(ierr);
834   ierr = CeedSetBackendFunction(ceed, "Operator", op,
835                                 "LinearAssembleQFunctionUpdate",
836                                 CeedOperatorLinearAssembleQFunctionUpdate_Opt);
837   CeedChkBackend(ierr);
838   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
839                                 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr);
840   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
841                                 CeedOperatorDestroy_Opt); CeedChkBackend(ierr);
842   return CEED_ERROR_SUCCESS;
843 }
844 //------------------------------------------------------------------------------
845