xref: /libCEED/backends/opt/ceed-opt-operator.c (revision 965b642822863d4d52d1102a1aaaa76bfa2e3720)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include "ceed-opt.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
28                                        bool is_input, const CeedInt blk_size,
29                                        CeedElemRestriction *blk_restr,
30                                        CeedVector *e_vecs_full, CeedVector *e_vecs,
31                                        CeedVector *q_vecs, CeedInt start_e,
32                                        CeedInt num_fields, CeedInt Q) {
33   CeedInt dim, ierr, num_comp, size, P;
34   Ceed ceed;
35   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
36   CeedBasis basis;
37   CeedElemRestriction r;
38   CeedOperatorField *op_fields;
39   CeedQFunctionField *qf_fields;
40   if (is_input) {
41     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
42     CeedChkBackend(ierr);
43     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
44     CeedChkBackend(ierr);
45   } else {
46     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields);
47     CeedChkBackend(ierr);
48     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
49     CeedChkBackend(ierr);
50   }
51 
52   // Loop over fields
53   for (CeedInt i=0; i<num_fields; i++) {
54     CeedEvalMode eval_mode;
55     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
56     CeedChkBackend(ierr);
57 
58     if (eval_mode != CEED_EVAL_WEIGHT) {
59       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
60       CeedChkBackend(ierr);
61       Ceed ceed;
62       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
63       CeedInt num_elem, elem_size, l_size, comp_stride;
64       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
65       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
66       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
67       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
68 
69       bool strided;
70       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
71       if (strided) {
72         CeedInt strides[3];
73         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
74         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
75                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
76         CeedChkBackend(ierr);
77       } else {
78         const CeedInt *offsets = NULL;
79         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
80         CeedChkBackend(ierr);
81         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
82         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
83                                                 blk_size, num_comp, comp_stride,
84                                                 l_size, CEED_MEM_HOST,
85                                                 CEED_COPY_VALUES, offsets,
86                                                 &blk_restr[i+start_e]);
87         CeedChkBackend(ierr);
88         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
89       }
90       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
91                                              &e_vecs_full[i+start_e]);
92       CeedChkBackend(ierr);
93     }
94 
95     switch(eval_mode) {
96     case CEED_EVAL_NONE:
97       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
98       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]);
99       CeedChkBackend(ierr);
100       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
101       CeedChkBackend(ierr);
102       break;
103     case CEED_EVAL_INTERP:
104       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
105       ierr = CeedElemRestrictionGetElementSize(r, &P);
106       CeedChkBackend(ierr);
107       ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]);
108       CeedChkBackend(ierr);
109       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
110       CeedChkBackend(ierr);
111       break;
112     case CEED_EVAL_GRAD:
113       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
114       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
115       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
116       ierr = CeedElemRestrictionGetElementSize(r, &P);
117       CeedChkBackend(ierr);
118       ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]);
119       CeedChkBackend(ierr);
120       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
121       CeedChkBackend(ierr);
122       break;
123     case CEED_EVAL_WEIGHT: // Only on input fields
124       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
125       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
126       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
127                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
128       CeedChkBackend(ierr);
129 
130       break;
131     case CEED_EVAL_DIV:
132       break; // Not implemented
133     case CEED_EVAL_CURL:
134       break; // Not implemented
135     }
136   }
137   return CEED_ERROR_SUCCESS;
138 }
139 
140 //------------------------------------------------------------------------------
141 // Setup Operator
142 //------------------------------------------------------------------------------
143 static int CeedOperatorSetup_Opt(CeedOperator op) {
144   int ierr;
145   bool is_setup_done;
146   ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr);
147   if (is_setup_done) return CEED_ERROR_SUCCESS;
148   Ceed ceed;
149   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
150   Ceed_Opt *ceed_impl;
151   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
152   const CeedInt blk_size = ceed_impl->blk_size;
153   CeedOperator_Opt *impl;
154   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
155   CeedQFunction qf;
156   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
157   CeedInt Q, num_input_fields, num_output_fields;
158   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
159   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
160   CeedOperatorField *op_input_fields, *op_output_fields;
161   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
162                                &num_output_fields, &op_output_fields);
163   CeedChkBackend(ierr);
164   CeedQFunctionField *qf_input_fields, *qf_output_fields;
165   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
166                                 &qf_output_fields);
167   CeedChkBackend(ierr);
168 
169   // Allocate
170   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
171   CeedChkBackend(ierr);
172   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full);
173   CeedChkBackend(ierr);
174 
175   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr);
176   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
177   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
178   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
179   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
180 
181   impl->num_inputs = num_input_fields;
182   impl->num_outputs = num_output_fields;
183 
184   // Set up infield and outfield pointer arrays
185   // Infields
186   ierr = CeedOperatorSetupFields_Opt(qf, op, true, blk_size, impl->blk_restr,
187                                      impl->e_vecs_full, impl->e_vecs_in,
188                                      impl->q_vecs_in, 0, num_input_fields, Q);
189   CeedChkBackend(ierr);
190   // Outfields
191   ierr = CeedOperatorSetupFields_Opt(qf, op, false, blk_size, impl->blk_restr,
192                                      impl->e_vecs_full, impl->e_vecs_out,
193                                      impl->q_vecs_out, num_input_fields,
194                                      num_output_fields, Q);
195   CeedChkBackend(ierr);
196 
197   // Identity QFunctions
198   if (impl->is_identity_qf) {
199     CeedEvalMode in_mode, out_mode;
200     CeedQFunctionField *in_fields, *out_fields;
201     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
202     CeedChkBackend(ierr);
203     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
204     CeedChkBackend(ierr);
205     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
206     CeedChkBackend(ierr);
207 
208     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
209       impl->is_identity_restr_op = true;
210     } else {
211       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
212       impl->q_vecs_out[0] = impl->q_vecs_in[0];
213       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
214     }
215   }
216 
217   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
218 
219   return CEED_ERROR_SUCCESS;
220 }
221 
222 //------------------------------------------------------------------------------
223 // Setup Input Fields
224 //------------------------------------------------------------------------------
225 static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields,
226     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
227     CeedVector in_vec, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
228     CeedRequest *request) {
229   CeedInt ierr;
230   CeedEvalMode eval_mode;
231   CeedVector vec;
232   uint64_t state;
233 
234   for (CeedInt i=0; i<num_input_fields; i++) {
235     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
236     CeedChkBackend(ierr);
237     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
238     } else {
239       // Get input vector
240       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
241       CeedChkBackend(ierr);
242       if (vec != CEED_VECTOR_ACTIVE) {
243         // Restrict
244         ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
245         if (state != impl->input_states[i]) {
246           ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
247                                           vec, impl->e_vecs_full[i], request);
248           CeedChkBackend(ierr);
249           impl->input_states[i] = state;
250         }
251       } else {
252         // Set Qvec for CEED_EVAL_NONE
253         if (eval_mode == CEED_EVAL_NONE) {
254           ierr = CeedVectorGetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
255                                     &e_data[i]); CeedChkBackend(ierr);
256           ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
257                                     CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr);
258           ierr = CeedVectorRestoreArray(impl->e_vecs_in[i],
259                                         &e_data[i]); CeedChkBackend(ierr);
260         }
261       }
262       // Get evec
263       ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST,
264                                     (const CeedScalar **) &e_data[i]);
265       CeedChkBackend(ierr);
266     }
267   }
268   return CEED_ERROR_SUCCESS;
269 }
270 
271 //------------------------------------------------------------------------------
272 // Input Basis Action
273 //------------------------------------------------------------------------------
274 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
275     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
276     CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active,
277     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
278     CeedRequest *request) {
279   CeedInt ierr;
280   CeedInt dim, elem_size, size;
281   CeedElemRestriction elem_restr;
282   CeedEvalMode eval_mode;
283   CeedBasis basis;
284   CeedVector vec;
285 
286   for (CeedInt i=0; i<num_input_fields; i++) {
287     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
288     CeedChkBackend(ierr);
289     // Skip active input
290     if (skip_active) {
291       if (vec == CEED_VECTOR_ACTIVE)
292         continue;
293     }
294 
295     CeedInt active_in = 0;
296     // Get elem_size, eval_mode, size
297     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
298     CeedChkBackend(ierr);
299     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
300     CeedChkBackend(ierr);
301     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
302     CeedChkBackend(ierr);
303     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
304     CeedChkBackend(ierr);
305     // Restrict block active input
306     if (vec == CEED_VECTOR_ACTIVE) {
307       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size,
308                                            CEED_NOTRANSPOSE, in_vec,
309                                            impl->e_vecs_in[i], request);
310       CeedChkBackend(ierr);
311       active_in = 1;
312     }
313     // Basis action
314     switch(eval_mode) {
315     case CEED_EVAL_NONE:
316       if (!active_in) {
317         ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
318                                   CEED_USE_POINTER, &e_data[i][e*Q*size]);
319         CeedChkBackend(ierr);
320       }
321       break;
322     case CEED_EVAL_INTERP:
323       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
324       CeedChkBackend(ierr);
325       if (!active_in) {
326         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
327                                   CEED_USE_POINTER, &e_data[i][e*elem_size*size]);
328         CeedChkBackend(ierr);
329       }
330       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
331                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
332                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
333       break;
334     case CEED_EVAL_GRAD:
335       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
336       CeedChkBackend(ierr);
337       if (!active_in) {
338         ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
339         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
340                                   CEED_USE_POINTER,
341                                   &e_data[i][e*elem_size*size/dim]);
342         CeedChkBackend(ierr);
343       }
344       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
345                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
346                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
347       break;
348     case CEED_EVAL_WEIGHT:
349       break;  // No action
350     // LCOV_EXCL_START
351     case CEED_EVAL_DIV:
352     case CEED_EVAL_CURL: {
353       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
354       CeedChkBackend(ierr);
355       Ceed ceed;
356       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
357       return CeedError(ceed, CEED_ERROR_BACKEND,
358                        "Ceed evaluation mode not implemented");
359       // LCOV_EXCL_STOP
360     }
361     }
362   }
363   return CEED_ERROR_SUCCESS;
364 }
365 
366 //------------------------------------------------------------------------------
367 // Output Basis Action
368 //------------------------------------------------------------------------------
369 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
370     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
371     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
372     CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl,
373     CeedRequest *request) {
374   CeedInt ierr;
375   CeedElemRestriction elem_restr;
376   CeedEvalMode eval_mode;
377   CeedBasis basis;
378   CeedVector vec;
379 
380   for (CeedInt i=0; i<num_output_fields; i++) {
381     // Get elem_size, eval_mode, size
382     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
383     CeedChkBackend(ierr);
384     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
385     CeedChkBackend(ierr);
386     // Basis action
387     switch(eval_mode) {
388     case CEED_EVAL_NONE:
389       break; // No action
390     case CEED_EVAL_INTERP:
391       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
392       CeedChkBackend(ierr);
393       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
394                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
395                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
396       break;
397     case CEED_EVAL_GRAD:
398       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
399       CeedChkBackend(ierr);
400       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
401                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
402                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
403       break;
404     // LCOV_EXCL_START
405     case CEED_EVAL_WEIGHT: {
406       Ceed ceed;
407       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
408       return CeedError(ceed, CEED_ERROR_BACKEND,
409                        "CEED_EVAL_WEIGHT cannot be an output "
410                        "evaluation mode");
411     }
412     case CEED_EVAL_DIV:
413     case CEED_EVAL_CURL: {
414       Ceed ceed;
415       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
416       return CeedError(ceed, CEED_ERROR_BACKEND,
417                        "Ceed evaluation mode not implemented");
418       // LCOV_EXCL_STOP
419     }
420     }
421     // Restrict output block
422     // Get output vector
423     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
424     CeedChkBackend(ierr);
425     if (vec == CEED_VECTOR_ACTIVE)
426       vec = out_vec;
427     // Restrict
428     ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_inputs],
429                                          e/blk_size, CEED_TRANSPOSE,
430                                          impl->e_vecs_out[i], vec, request);
431     CeedChkBackend(ierr);
432   }
433   return CEED_ERROR_SUCCESS;
434 }
435 
436 //------------------------------------------------------------------------------
437 // Restore Input Vectors
438 //------------------------------------------------------------------------------
439 static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields,
440     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
441     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) {
442   CeedInt ierr;
443   CeedEvalMode eval_mode;
444 
445   for (CeedInt i=0; i<num_input_fields; i++) {
446     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
447     CeedChkBackend(ierr);
448     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
449     } else {
450       ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i],
451                                         (const CeedScalar **) &e_data[i]);
452       CeedChkBackend(ierr);
453     }
454   }
455   return CEED_ERROR_SUCCESS;
456 }
457 
458 //------------------------------------------------------------------------------
459 // Operator Apply
460 //------------------------------------------------------------------------------
461 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec,
462                                     CeedVector out_vec, CeedRequest *request) {
463   int ierr;
464   Ceed ceed;
465   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
466   Ceed_Opt *ceed_impl;
467   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
468   CeedInt blk_size = ceed_impl->blk_size;
469   CeedOperator_Opt *impl;
470   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
471   CeedInt Q, num_input_fields, num_output_fields, num_elem;
472   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
473   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
474   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
475   CeedQFunction qf;
476   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
477   CeedOperatorField *op_input_fields, *op_output_fields;
478   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
479                                &num_output_fields, &op_output_fields);
480   CeedChkBackend(ierr);
481   CeedQFunctionField *qf_input_fields, *qf_output_fields;
482   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
483                                 &qf_output_fields);
484   CeedChkBackend(ierr);
485   CeedEvalMode eval_mode;
486   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
487 
488   // Setup
489   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
490 
491   // Restriction only operator
492   if (impl->is_identity_restr_op) {
493     for (CeedInt b=0; b<num_blks; b++) {
494       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE,
495                                            in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr);
496       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE,
497                                            impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr);
498     }
499     return CEED_ERROR_SUCCESS;
500   }
501 
502   // Input Evecs and Restriction
503   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
504                                      op_input_fields, in_vec, e_data,
505                                      impl, request); CeedChkBackend(ierr);
506 
507   // Output Lvecs, Evecs, and Qvecs
508   for (CeedInt i=0; i<num_output_fields; i++) {
509     // Set Qvec if needed
510     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
511     CeedChkBackend(ierr);
512     if (eval_mode == CEED_EVAL_NONE) {
513       // Set qvec to single block evec
514       ierr = CeedVectorGetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
515                                 &e_data[i + num_input_fields]);
516       CeedChkBackend(ierr);
517       ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
518                                 CEED_USE_POINTER, e_data[i + num_input_fields]);
519       CeedChkBackend(ierr);
520       ierr = CeedVectorRestoreArray(impl->e_vecs_out[i],
521                                     &e_data[i + num_input_fields]);
522       CeedChkBackend(ierr);
523     }
524   }
525 
526   // Loop through elements
527   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
528     // Input basis apply
529     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
530                                       num_input_fields, blk_size, in_vec, false,
531                                       e_data, impl, request); CeedChkBackend(ierr);
532 
533     // Q function
534     if (!impl->is_identity_qf) {
535       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
536       CeedChkBackend(ierr);
537     }
538 
539     // Output basis apply and restrict
540     ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields,
541                                        blk_size, num_input_fields, num_output_fields,
542                                        op, out_vec, impl, request);
543     CeedChkBackend(ierr);
544   }
545 
546   // Restore input arrays
547   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
548                                        op_input_fields, e_data, impl);
549   CeedChkBackend(ierr);
550 
551   return CEED_ERROR_SUCCESS;
552 }
553 
554 //------------------------------------------------------------------------------
555 // Core code for linear QFunction assembly
556 //------------------------------------------------------------------------------
557 static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op,
558     bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
559     CeedRequest *request) {
560   int ierr;
561   Ceed ceed;
562   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
563   Ceed_Opt *ceed_impl;
564   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
565   const CeedInt blk_size = ceed_impl->blk_size;
566   CeedOperator_Opt *impl;
567   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
568   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
569   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
570   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
571   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
572   CeedQFunction qf;
573   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
574   CeedOperatorField *op_input_fields, *op_output_fields;
575   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
576                                &num_output_fields, &op_output_fields);
577   CeedChkBackend(ierr);
578   CeedQFunctionField *qf_input_fields, *qf_output_fields;
579   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
580                                 &qf_output_fields);
581   CeedChkBackend(ierr);
582   CeedVector vec, l_vec = impl->qf_l_vec;
583   CeedInt num_active_in = impl->num_active_in,
584           num_active_out = impl->num_active_out;
585   CeedVector *active_in = impl->qf_active_in;
586   CeedScalar *a, *tmp;
587   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
588 
589   // Setup
590   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
591 
592   // Check for identity
593   if (impl->is_identity_qf)
594     // LCOV_EXCL_START
595     return CeedError(ceed, CEED_ERROR_BACKEND,
596                      "Assembling identity qfunctions not supported");
597   // LCOV_EXCL_STOP
598 
599   // Input Evecs and Restriction
600   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
601                                      op_input_fields, NULL, e_data,
602                                      impl, request); CeedChkBackend(ierr);
603 
604   // Count number of active input fields
605   if (!num_active_in) {
606     for (CeedInt i=0; i<num_input_fields; i++) {
607       // Get input vector
608       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
609       CeedChkBackend(ierr);
610       // Check if active input
611       if (vec == CEED_VECTOR_ACTIVE) {
612         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
613         CeedChkBackend(ierr);
614         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
615         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
616         CeedChkBackend(ierr);
617         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
618         for (CeedInt field=0; field<size; field++) {
619           ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
620           CeedChkBackend(ierr);
621           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
622                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
623           CeedChkBackend(ierr);
624         }
625         num_active_in += size;
626         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
627       }
628     }
629     impl->num_active_in = num_active_in;
630     impl->qf_active_in = active_in;
631   }
632 
633   // Count number of active output fields
634   if (!num_active_out) {
635     for (CeedInt i=0; i<num_output_fields; i++) {
636       // Get output vector
637       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
638       CeedChkBackend(ierr);
639       // Check if active output
640       if (vec == CEED_VECTOR_ACTIVE) {
641         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
642         CeedChkBackend(ierr);
643         num_active_out += size;
644       }
645     }
646     impl->num_active_out = num_active_out;
647   }
648 
649   // Check sizes
650   if (!num_active_in || !num_active_out)
651     // LCOV_EXCL_START
652     return CeedError(ceed, CEED_ERROR_BACKEND,
653                      "Cannot assemble QFunction without active inputs "
654                      "and outputs");
655   // LCOV_EXCL_STOP
656 
657   // Setup l_vec
658   if (!l_vec) {
659     ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
660                             &l_vec); CeedChkBackend(ierr);
661     impl->qf_l_vec = l_vec;
662   }
663   ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
664 
665   // Build objects if needed
666   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
667   if (build_objects) {
668     // Create output restriction
669     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
670                                             num_active_in*num_active_out,
671                                             num_active_in*num_active_out*num_elem*Q,
672                                             strides, rstr); CeedChkBackend(ierr);
673     // Create assembled vector
674     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
675                             assembled); CeedChkBackend(ierr);
676   }
677 
678   // Loop through elements
679   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
680     // Input basis apply
681     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
682                                       num_input_fields, blk_size, NULL, true,
683                                       e_data, impl, request); CeedChkBackend(ierr);
684 
685     // Assemble QFunction
686     for (CeedInt in=0; in<num_active_in; in++) {
687       // Set Inputs
688       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
689       if (num_active_in > 1) {
690         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
691                                   0.0); CeedChkBackend(ierr);
692       }
693       // Set Outputs
694       for (CeedInt out=0; out<num_output_fields; out++) {
695         // Get output vector
696         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
697         CeedChkBackend(ierr);
698         // Check if active output
699         if (vec == CEED_VECTOR_ACTIVE) {
700           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
701                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
702           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
703           CeedChkBackend(ierr);
704           a += size*Q*blk_size; // Advance the pointer by the size of the output
705         }
706       }
707       // Apply QFunction
708       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
709       CeedChkBackend(ierr);
710     }
711   }
712 
713   // Un-set output Qvecs to prevent accidental overwrite of Assembled
714   for (CeedInt out=0; out<num_output_fields; out++) {
715     // Get output vector
716     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
717     CeedChkBackend(ierr);
718     // Check if active output
719     if (vec == CEED_VECTOR_ACTIVE) {
720       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
721                          NULL); CeedChkBackend(ierr);
722     }
723   }
724 
725   // Restore input arrays
726   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
727                                        op_input_fields, e_data, impl);
728   CeedChkBackend(ierr);
729 
730   // Output blocked restriction
731   ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr);
732   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
733   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
734   if (!blk_rstr) {
735     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
736            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
737            strides, &blk_rstr); CeedChkBackend(ierr);
738     impl->qf_blk_rstr = blk_rstr;
739   }
740   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled,
741                                   request); CeedChkBackend(ierr);
742 
743   return CEED_ERROR_SUCCESS;
744 }
745 
746 //------------------------------------------------------------------------------
747 // Assemble Linear QFunction
748 //------------------------------------------------------------------------------
749 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op,
750     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
751   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr,
752          request);
753 }
754 
755 //------------------------------------------------------------------------------
756 // Update Assembled Linear QFunction
757 //------------------------------------------------------------------------------
758 static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op,
759     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
760   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled,
761          &rstr, request);
762 }
763 
764 //------------------------------------------------------------------------------
765 // Operator Destroy
766 //------------------------------------------------------------------------------
767 static int CeedOperatorDestroy_Opt(CeedOperator op) {
768   int ierr;
769   CeedOperator_Opt *impl;
770   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
771 
772   for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) {
773     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
774     ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr);
775   }
776   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
777   ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr);
778   ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr);
779 
780   for (CeedInt i=0; i<impl->num_inputs; i++) {
781     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
782     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
783   }
784   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
785   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
786 
787   for (CeedInt i=0; i<impl->num_outputs; i++) {
788     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
789     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
790   }
791   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
792   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
793 
794   // QFunction assembly data
795   for (CeedInt i=0; i<impl->num_active_in; i++) {
796     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
797   }
798   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
799   ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr);
800   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
801 
802   ierr = CeedFree(&impl); CeedChkBackend(ierr);
803   return CEED_ERROR_SUCCESS;
804 }
805 
806 //------------------------------------------------------------------------------
807 // Operator Create
808 //------------------------------------------------------------------------------
809 int CeedOperatorCreate_Opt(CeedOperator op) {
810   int ierr;
811   Ceed ceed;
812   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
813   Ceed_Opt *ceed_impl;
814   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
815   CeedInt blk_size = ceed_impl->blk_size;
816   CeedOperator_Opt *impl;
817 
818   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
819   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
820 
821   if (blk_size != 1 && blk_size != 8)
822     // LCOV_EXCL_START
823     return CeedError(ceed, CEED_ERROR_BACKEND,
824                      "Opt backend cannot use blocksize: %d", blk_size);
825   // LCOV_EXCL_STOP
826 
827   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
828                                 CeedOperatorLinearAssembleQFunction_Opt);
829   CeedChkBackend(ierr);
830   ierr = CeedSetBackendFunction(ceed, "Operator", op,
831                                 "LinearAssembleQFunctionUpdate",
832                                 CeedOperatorLinearAssembleQFunctionUpdate_Opt);
833   CeedChkBackend(ierr);
834   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
835                                 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr);
836   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
837                                 CeedOperatorDestroy_Opt); CeedChkBackend(ierr);
838   return CEED_ERROR_SUCCESS;
839 }
840 //------------------------------------------------------------------------------
841