xref: /libCEED/backends/opt/ceed-opt-operator.c (revision d264344313546611a0e282df1f09990e3ff407ce)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include "ceed-opt.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
28                                        bool is_input, const CeedInt blk_size,
29                                        CeedElemRestriction *blk_restr,
30                                        CeedVector *e_vecs_full, CeedVector *e_vecs,
31                                        CeedVector *q_vecs, CeedInt start_e,
32                                        CeedInt num_fields, CeedInt Q) {
33   CeedInt ierr, num_comp, size, P;
34   CeedSize e_size, q_size;
35   Ceed ceed;
36   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
37   CeedBasis basis;
38   CeedElemRestriction r;
39   CeedOperatorField *op_fields;
40   CeedQFunctionField *qf_fields;
41   if (is_input) {
42     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
43     CeedChkBackend(ierr);
44     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
45     CeedChkBackend(ierr);
46   } else {
47     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields);
48     CeedChkBackend(ierr);
49     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
50     CeedChkBackend(ierr);
51   }
52 
53   // Loop over fields
54   for (CeedInt i=0; i<num_fields; i++) {
55     CeedEvalMode eval_mode;
56     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
57     CeedChkBackend(ierr);
58 
59     if (eval_mode != CEED_EVAL_WEIGHT) {
60       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
61       CeedChkBackend(ierr);
62       Ceed ceed;
63       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
64       CeedSize l_size;
65       CeedInt num_elem, elem_size, comp_stride;
66       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
67       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
68       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
69       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
70 
71       bool strided;
72       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
73       if (strided) {
74         CeedInt strides[3];
75         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
76         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
77                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
78         CeedChkBackend(ierr);
79       } else {
80         const CeedInt *offsets = NULL;
81         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
82         CeedChkBackend(ierr);
83         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
84         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
85                                                 blk_size, num_comp, comp_stride,
86                                                 l_size, CEED_MEM_HOST,
87                                                 CEED_COPY_VALUES, offsets,
88                                                 &blk_restr[i+start_e]);
89         CeedChkBackend(ierr);
90         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
91       }
92       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
93                                              &e_vecs_full[i+start_e]);
94       CeedChkBackend(ierr);
95     }
96 
97     switch(eval_mode) {
98     case CEED_EVAL_NONE:
99       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
100       e_size = (CeedSize)Q*size*blk_size;
101       ierr = CeedVectorCreate(ceed, e_size, &e_vecs[i]); CeedChkBackend(ierr);
102       q_size = (CeedSize)Q*size*blk_size;
103       ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr);
104       break;
105     case CEED_EVAL_INTERP:
106     case CEED_EVAL_GRAD:
107       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
108       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
109       ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr);
110       ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
111       e_size = (CeedSize)P*num_comp*blk_size;
112       ierr = CeedVectorCreate(ceed, e_size, &e_vecs[i]); CeedChkBackend(ierr);
113       q_size = (CeedSize)Q*size*blk_size;
114       ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr);
115       break;
116     case CEED_EVAL_WEIGHT: // Only on input fields
117       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
118       q_size = (CeedSize)Q*blk_size;
119       ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr);
120       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
121                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
122       CeedChkBackend(ierr);
123 
124       break;
125     case CEED_EVAL_DIV:
126       break; // Not implemented
127     case CEED_EVAL_CURL:
128       break; // Not implemented
129     }
130     if (is_input && !!e_vecs[i]) {
131       ierr = CeedVectorSetArray(e_vecs[i], CEED_MEM_HOST,
132                                 CEED_COPY_VALUES, NULL); CeedChkBackend(ierr);
133     }
134   }
135   return CEED_ERROR_SUCCESS;
136 }
137 
138 //------------------------------------------------------------------------------
139 // Setup Operator
140 //------------------------------------------------------------------------------
141 static int CeedOperatorSetup_Opt(CeedOperator op) {
142   int ierr;
143   bool is_setup_done;
144   ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr);
145   if (is_setup_done) return CEED_ERROR_SUCCESS;
146   Ceed ceed;
147   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
148   Ceed_Opt *ceed_impl;
149   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
150   const CeedInt blk_size = ceed_impl->blk_size;
151   CeedOperator_Opt *impl;
152   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
153   CeedQFunction qf;
154   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
155   CeedInt Q, num_input_fields, num_output_fields;
156   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
157   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
158   CeedOperatorField *op_input_fields, *op_output_fields;
159   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
160                                &num_output_fields, &op_output_fields);
161   CeedChkBackend(ierr);
162   CeedQFunctionField *qf_input_fields, *qf_output_fields;
163   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
164                                 &qf_output_fields);
165   CeedChkBackend(ierr);
166 
167   // Allocate
168   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
169   CeedChkBackend(ierr);
170   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full);
171   CeedChkBackend(ierr);
172 
173   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr);
174   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
175   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
176   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
177   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
178 
179   impl->num_inputs = num_input_fields;
180   impl->num_outputs = num_output_fields;
181 
182   // Set up infield and outfield pointer arrays
183   // Infields
184   ierr = CeedOperatorSetupFields_Opt(qf, op, true, blk_size, impl->blk_restr,
185                                      impl->e_vecs_full, impl->e_vecs_in,
186                                      impl->q_vecs_in, 0, num_input_fields, Q);
187   CeedChkBackend(ierr);
188   // Outfields
189   ierr = CeedOperatorSetupFields_Opt(qf, op, false, blk_size, impl->blk_restr,
190                                      impl->e_vecs_full, impl->e_vecs_out,
191                                      impl->q_vecs_out, num_input_fields,
192                                      num_output_fields, Q);
193   CeedChkBackend(ierr);
194 
195   // Identity QFunctions
196   if (impl->is_identity_qf) {
197     CeedEvalMode in_mode, out_mode;
198     CeedQFunctionField *in_fields, *out_fields;
199     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
200     CeedChkBackend(ierr);
201     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
202     CeedChkBackend(ierr);
203     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
204     CeedChkBackend(ierr);
205 
206     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
207       impl->is_identity_restr_op = true;
208     } else {
209       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
210       impl->q_vecs_out[0] = impl->q_vecs_in[0];
211       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
212     }
213   }
214 
215   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
216 
217   return CEED_ERROR_SUCCESS;
218 }
219 
220 //------------------------------------------------------------------------------
221 // Setup Input Fields
222 //------------------------------------------------------------------------------
223 static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields,
224     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
225     CeedVector in_vec, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
226     CeedRequest *request) {
227   CeedInt ierr;
228   CeedEvalMode eval_mode;
229   CeedVector vec;
230   uint64_t state;
231 
232   for (CeedInt i=0; i<num_input_fields; i++) {
233     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
234     CeedChkBackend(ierr);
235     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
236     } else {
237       // Get input vector
238       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
239       CeedChkBackend(ierr);
240       if (vec != CEED_VECTOR_ACTIVE) {
241         // Restrict
242         ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
243         if (state != impl->input_states[i]) {
244           ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
245                                           vec, impl->e_vecs_full[i], request);
246           CeedChkBackend(ierr);
247           impl->input_states[i] = state;
248         }
249         // Get evec
250         ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST,
251                                       (const CeedScalar **) &e_data[i]);
252         CeedChkBackend(ierr);
253       } else {
254         // Set Qvec for CEED_EVAL_NONE
255         if (eval_mode == CEED_EVAL_NONE) {
256           ierr = CeedVectorGetArrayRead(impl->e_vecs_in[i], CEED_MEM_HOST,
257                                         (const CeedScalar **)&e_data[i]);
258           CeedChkBackend(ierr);
259           ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
260                                     CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr);
261           ierr = CeedVectorRestoreArrayRead(impl->e_vecs_in[i],
262                                             (const CeedScalar **)&e_data[i]);
263           CeedChkBackend(ierr);
264         }
265       }
266     }
267   }
268   return CEED_ERROR_SUCCESS;
269 }
270 
271 //------------------------------------------------------------------------------
272 // Input Basis Action
273 //------------------------------------------------------------------------------
274 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
275     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
276     CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active,
277     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
278     CeedRequest *request) {
279   CeedInt ierr;
280   CeedInt dim, elem_size, size;
281   CeedElemRestriction elem_restr;
282   CeedEvalMode eval_mode;
283   CeedBasis basis;
284   CeedVector vec;
285 
286   for (CeedInt i=0; i<num_input_fields; i++) {
287     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
288     CeedChkBackend(ierr);
289     // Skip active input
290     if (skip_active) {
291       if (vec == CEED_VECTOR_ACTIVE)
292         continue;
293     }
294 
295     CeedInt active_in = 0;
296     // Get elem_size, eval_mode, size
297     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
298     CeedChkBackend(ierr);
299     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
300     CeedChkBackend(ierr);
301     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
302     CeedChkBackend(ierr);
303     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
304     CeedChkBackend(ierr);
305     // Restrict block active input
306     if (vec == CEED_VECTOR_ACTIVE) {
307       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size,
308                                            CEED_NOTRANSPOSE, in_vec,
309                                            impl->e_vecs_in[i], request);
310       CeedChkBackend(ierr);
311       active_in = 1;
312     }
313     // Basis action
314     switch(eval_mode) {
315     case CEED_EVAL_NONE:
316       if (!active_in) {
317         ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
318                                   CEED_USE_POINTER, &e_data[i][e*Q*size]);
319         CeedChkBackend(ierr);
320       }
321       break;
322     case CEED_EVAL_INTERP:
323       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
324       CeedChkBackend(ierr);
325       if (!active_in) {
326         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
327                                   CEED_USE_POINTER, &e_data[i][e*elem_size*size]);
328         CeedChkBackend(ierr);
329       }
330       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
331                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
332                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
333       break;
334     case CEED_EVAL_GRAD:
335       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
336       CeedChkBackend(ierr);
337       if (!active_in) {
338         ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
339         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
340                                   CEED_USE_POINTER,
341                                   &e_data[i][e*elem_size*size/dim]);
342         CeedChkBackend(ierr);
343       }
344       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
345                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
346                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
347       break;
348     case CEED_EVAL_WEIGHT:
349       break;  // No action
350     // LCOV_EXCL_START
351     case CEED_EVAL_DIV:
352     case CEED_EVAL_CURL: {
353       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
354       CeedChkBackend(ierr);
355       Ceed ceed;
356       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
357       return CeedError(ceed, CEED_ERROR_BACKEND,
358                        "Ceed evaluation mode not implemented");
359       // LCOV_EXCL_STOP
360     }
361     }
362   }
363   return CEED_ERROR_SUCCESS;
364 }
365 
366 //------------------------------------------------------------------------------
367 // Output Basis Action
368 //------------------------------------------------------------------------------
369 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
370     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
371     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
372     CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl,
373     CeedRequest *request) {
374   CeedInt ierr;
375   CeedElemRestriction elem_restr;
376   CeedEvalMode eval_mode;
377   CeedBasis basis;
378   CeedVector vec;
379 
380   for (CeedInt i=0; i<num_output_fields; i++) {
381     // Get elem_size, eval_mode, size
382     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
383     CeedChkBackend(ierr);
384     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
385     CeedChkBackend(ierr);
386     // Basis action
387     switch(eval_mode) {
388     case CEED_EVAL_NONE:
389       break; // No action
390     case CEED_EVAL_INTERP:
391       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
392       CeedChkBackend(ierr);
393       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
394                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
395                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
396       break;
397     case CEED_EVAL_GRAD:
398       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
399       CeedChkBackend(ierr);
400       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
401                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
402                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
403       break;
404     // LCOV_EXCL_START
405     case CEED_EVAL_WEIGHT: {
406       Ceed ceed;
407       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
408       return CeedError(ceed, CEED_ERROR_BACKEND,
409                        "CEED_EVAL_WEIGHT cannot be an output "
410                        "evaluation mode");
411     }
412     case CEED_EVAL_DIV:
413     case CEED_EVAL_CURL: {
414       Ceed ceed;
415       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
416       return CeedError(ceed, CEED_ERROR_BACKEND,
417                        "Ceed evaluation mode not implemented");
418       // LCOV_EXCL_STOP
419     }
420     }
421     // Restrict output block
422     // Get output vector
423     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
424     CeedChkBackend(ierr);
425     if (vec == CEED_VECTOR_ACTIVE)
426       vec = out_vec;
427     // Restrict
428     ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_inputs],
429                                          e/blk_size, CEED_TRANSPOSE,
430                                          impl->e_vecs_out[i], vec, request);
431     CeedChkBackend(ierr);
432   }
433   return CEED_ERROR_SUCCESS;
434 }
435 
436 //------------------------------------------------------------------------------
437 // Restore Input Vectors
438 //------------------------------------------------------------------------------
439 static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields,
440     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
441     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) {
442   CeedInt ierr;
443 
444   for (CeedInt i=0; i<num_input_fields; i++) {
445     CeedEvalMode eval_mode;
446     CeedVector vec;
447     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
448     CeedChkBackend(ierr);
449     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
450     CeedChkBackend(ierr);
451     if (eval_mode != CEED_EVAL_WEIGHT && vec != CEED_VECTOR_ACTIVE) {
452       ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i],
453                                         (const CeedScalar **) &e_data[i]);
454       CeedChkBackend(ierr);
455     }
456   }
457   return CEED_ERROR_SUCCESS;
458 }
459 
460 //------------------------------------------------------------------------------
461 // Operator Apply
462 //------------------------------------------------------------------------------
463 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec,
464                                     CeedVector out_vec, CeedRequest *request) {
465   int ierr;
466   Ceed ceed;
467   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
468   Ceed_Opt *ceed_impl;
469   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
470   CeedInt blk_size = ceed_impl->blk_size;
471   CeedOperator_Opt *impl;
472   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
473   CeedInt Q, num_input_fields, num_output_fields, num_elem;
474   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
475   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
476   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
477   CeedQFunction qf;
478   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
479   CeedOperatorField *op_input_fields, *op_output_fields;
480   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
481                                &num_output_fields, &op_output_fields);
482   CeedChkBackend(ierr);
483   CeedQFunctionField *qf_input_fields, *qf_output_fields;
484   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
485                                 &qf_output_fields);
486   CeedChkBackend(ierr);
487   CeedEvalMode eval_mode;
488   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
489 
490   // Setup
491   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
492 
493   // Restriction only operator
494   if (impl->is_identity_restr_op) {
495     for (CeedInt b=0; b<num_blks; b++) {
496       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE,
497                                            in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr);
498       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE,
499                                            impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr);
500     }
501     return CEED_ERROR_SUCCESS;
502   }
503 
504   // Input Evecs and Restriction
505   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
506                                      op_input_fields, in_vec, e_data,
507                                      impl, request); CeedChkBackend(ierr);
508 
509   // Output Lvecs, Evecs, and Qvecs
510   for (CeedInt i=0; i<num_output_fields; i++) {
511     // Set Qvec if needed
512     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
513     CeedChkBackend(ierr);
514     if (eval_mode == CEED_EVAL_NONE) {
515       // Set qvec to single block evec
516       ierr = CeedVectorGetArrayWrite(impl->e_vecs_out[i], CEED_MEM_HOST,
517                                      &e_data[i + num_input_fields]);
518       CeedChkBackend(ierr);
519       ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
520                                 CEED_USE_POINTER, e_data[i + num_input_fields]);
521       CeedChkBackend(ierr);
522       ierr = CeedVectorRestoreArray(impl->e_vecs_out[i],
523                                     &e_data[i + num_input_fields]);
524       CeedChkBackend(ierr);
525     }
526   }
527 
528   // Loop through elements
529   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
530     // Input basis apply
531     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
532                                       num_input_fields, blk_size, in_vec, false,
533                                       e_data, impl, request); CeedChkBackend(ierr);
534 
535     // Q function
536     if (!impl->is_identity_qf) {
537       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
538       CeedChkBackend(ierr);
539     }
540 
541     // Output basis apply and restrict
542     ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields,
543                                        blk_size, num_input_fields, num_output_fields,
544                                        op, out_vec, impl, request);
545     CeedChkBackend(ierr);
546   }
547 
548   // Restore input arrays
549   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
550                                        op_input_fields, e_data, impl);
551   CeedChkBackend(ierr);
552 
553   return CEED_ERROR_SUCCESS;
554 }
555 
556 //------------------------------------------------------------------------------
557 // Core code for linear QFunction assembly
558 //------------------------------------------------------------------------------
559 static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op,
560     bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
561     CeedRequest *request) {
562   int ierr;
563   Ceed ceed;
564   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
565   Ceed_Opt *ceed_impl;
566   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
567   const CeedInt blk_size = ceed_impl->blk_size;
568   CeedSize q_size;
569   CeedOperator_Opt *impl;
570   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
571   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
572   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
573   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
574   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
575   CeedQFunction qf;
576   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
577   CeedOperatorField *op_input_fields, *op_output_fields;
578   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
579                                &num_output_fields, &op_output_fields);
580   CeedChkBackend(ierr);
581   CeedQFunctionField *qf_input_fields, *qf_output_fields;
582   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
583                                 &qf_output_fields);
584   CeedChkBackend(ierr);
585   CeedVector vec, l_vec = impl->qf_l_vec;
586   CeedInt num_active_in = impl->num_active_in,
587           num_active_out = impl->num_active_out;
588   CeedVector *active_in = impl->qf_active_in;
589   CeedScalar *a, *tmp;
590   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
591 
592   // Setup
593   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
594 
595   // Check for identity
596   if (impl->is_identity_qf)
597     // LCOV_EXCL_START
598     return CeedError(ceed, CEED_ERROR_BACKEND,
599                      "Assembling identity qfunctions not supported");
600   // LCOV_EXCL_STOP
601 
602   // Input Evecs and Restriction
603   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
604                                      op_input_fields, NULL, e_data,
605                                      impl, request); CeedChkBackend(ierr);
606 
607   // Count number of active input fields
608   if (!num_active_in) {
609     for (CeedInt i=0; i<num_input_fields; i++) {
610       // Get input vector
611       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
612       CeedChkBackend(ierr);
613       // Check if active input
614       if (vec == CEED_VECTOR_ACTIVE) {
615         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
616         CeedChkBackend(ierr);
617         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
618         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
619         CeedChkBackend(ierr);
620         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
621         for (CeedInt field=0; field<size; field++) {
622           q_size = (CeedSize)Q*blk_size;
623           ierr = CeedVectorCreate(ceed, q_size, &active_in[num_active_in+field]);
624           CeedChkBackend(ierr);
625           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
626                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
627           CeedChkBackend(ierr);
628         }
629         num_active_in += size;
630         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
631       }
632     }
633     impl->num_active_in = num_active_in;
634     impl->qf_active_in = active_in;
635   }
636 
637   // Count number of active output fields
638   if (!num_active_out) {
639     for (CeedInt i=0; i<num_output_fields; i++) {
640       // Get output vector
641       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
642       CeedChkBackend(ierr);
643       // Check if active output
644       if (vec == CEED_VECTOR_ACTIVE) {
645         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
646         CeedChkBackend(ierr);
647         num_active_out += size;
648       }
649     }
650     impl->num_active_out = num_active_out;
651   }
652 
653   // Check sizes
654   if (!num_active_in || !num_active_out)
655     // LCOV_EXCL_START
656     return CeedError(ceed, CEED_ERROR_BACKEND,
657                      "Cannot assemble QFunction without active inputs "
658                      "and outputs");
659   // LCOV_EXCL_STOP
660 
661   // Setup l_vec
662   if (!l_vec) {
663     CeedSize l_size = (CeedSize)blk_size*Q*num_active_in*num_active_out;
664     ierr = CeedVectorCreate(ceed, l_size, &l_vec); CeedChkBackend(ierr);
665     ierr = CeedVectorSetValue(l_vec, 0.0); CeedChkBackend(ierr);
666     impl->qf_l_vec = l_vec;
667   }
668 
669   // Build objects if needed
670   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
671   if (build_objects) {
672     // Create output restriction
673     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
674                                             num_active_in*num_active_out,
675                                             num_active_in*num_active_out*num_elem*Q,
676                                             strides, rstr); CeedChkBackend(ierr);
677     // Create assembled vector
678     CeedSize l_size = (CeedSize)num_elem*Q*num_active_in*num_active_out;
679     ierr = CeedVectorCreate(ceed, l_size, assembled); CeedChkBackend(ierr);
680   }
681 
682   // Output blocked restriction
683   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
684   if (!blk_rstr) {
685     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
686            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
687            strides, &blk_rstr); CeedChkBackend(ierr);
688     impl->qf_blk_rstr = blk_rstr;
689   }
690 
691   // Loop through elements
692   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
693   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
694     ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
695 
696     // Input basis apply
697     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
698                                       num_input_fields, blk_size, NULL, true,
699                                       e_data, impl, request); CeedChkBackend(ierr);
700 
701     // Assemble QFunction
702     for (CeedInt in=0; in<num_active_in; in++) {
703       // Set Inputs
704       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
705       if (num_active_in > 1) {
706         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
707                                   0.0); CeedChkBackend(ierr);
708       }
709       // Set Outputs
710       for (CeedInt out=0; out<num_output_fields; out++) {
711         // Get output vector
712         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
713         CeedChkBackend(ierr);
714         // Check if active output
715         if (vec == CEED_VECTOR_ACTIVE) {
716           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
717                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
718           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
719           CeedChkBackend(ierr);
720           a += size*Q*blk_size; // Advance the pointer by the size of the output
721         }
722       }
723       // Apply QFunction
724       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
725       CeedChkBackend(ierr);
726     }
727 
728     // Assemble into assembled vector
729     ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr);
730     ierr = CeedElemRestrictionApplyBlock(blk_rstr, e/blk_size, CEED_TRANSPOSE,
731                                          l_vec, *assembled, request); CeedChkBackend(ierr);
732   }
733 
734   // Un-set output Qvecs to prevent accidental overwrite of Assembled
735   for (CeedInt out=0; out<num_output_fields; out++) {
736     // Get output vector
737     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
738     CeedChkBackend(ierr);
739     // Check if active output
740     if (vec == CEED_VECTOR_ACTIVE) {
741       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
742                          NULL); CeedChkBackend(ierr);
743     }
744   }
745 
746   // Restore input arrays
747   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
748                                        op_input_fields, e_data, impl);
749   CeedChkBackend(ierr);
750 
751   return CEED_ERROR_SUCCESS;
752 }
753 
754 //------------------------------------------------------------------------------
755 // Assemble Linear QFunction
756 //------------------------------------------------------------------------------
757 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op,
758     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
759   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr,
760          request);
761 }
762 
763 //------------------------------------------------------------------------------
764 // Update Assembled Linear QFunction
765 //------------------------------------------------------------------------------
766 static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op,
767     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
768   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled,
769          &rstr, request);
770 }
771 
772 //------------------------------------------------------------------------------
773 // Operator Destroy
774 //------------------------------------------------------------------------------
775 static int CeedOperatorDestroy_Opt(CeedOperator op) {
776   int ierr;
777   CeedOperator_Opt *impl;
778   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
779 
780   for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) {
781     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
782     ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr);
783   }
784   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
785   ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr);
786   ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr);
787 
788   for (CeedInt i=0; i<impl->num_inputs; i++) {
789     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
790     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
791   }
792   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
793   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
794 
795   for (CeedInt i=0; i<impl->num_outputs; i++) {
796     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
797     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
798   }
799   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
800   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
801 
802   // QFunction assembly data
803   for (CeedInt i=0; i<impl->num_active_in; i++) {
804     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
805   }
806   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
807   ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr);
808   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
809 
810   ierr = CeedFree(&impl); CeedChkBackend(ierr);
811   return CEED_ERROR_SUCCESS;
812 }
813 
814 //------------------------------------------------------------------------------
815 // Operator Create
816 //------------------------------------------------------------------------------
817 int CeedOperatorCreate_Opt(CeedOperator op) {
818   int ierr;
819   Ceed ceed;
820   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
821   Ceed_Opt *ceed_impl;
822   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
823   CeedInt blk_size = ceed_impl->blk_size;
824   CeedOperator_Opt *impl;
825 
826   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
827   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
828 
829   if (blk_size != 1 && blk_size != 8)
830     // LCOV_EXCL_START
831     return CeedError(ceed, CEED_ERROR_BACKEND,
832                      "Opt backend cannot use blocksize: %d", blk_size);
833   // LCOV_EXCL_STOP
834 
835   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
836                                 CeedOperatorLinearAssembleQFunction_Opt);
837   CeedChkBackend(ierr);
838   ierr = CeedSetBackendFunction(ceed, "Operator", op,
839                                 "LinearAssembleQFunctionUpdate",
840                                 CeedOperatorLinearAssembleQFunctionUpdate_Opt);
841   CeedChkBackend(ierr);
842   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
843                                 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr);
844   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
845                                 CeedOperatorDestroy_Opt); CeedChkBackend(ierr);
846   return CEED_ERROR_SUCCESS;
847 }
848 //------------------------------------------------------------------------------
849