xref: /libCEED/backends/opt/ceed-opt-operator.c (revision 9c774eddf8c0b4f5416196d32c5355c9591a7190)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include "ceed-opt.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
28                                        bool is_input, const CeedInt blk_size,
29                                        CeedElemRestriction *blk_restr,
30                                        CeedVector *e_vecs_full, CeedVector *e_vecs,
31                                        CeedVector *q_vecs, CeedInt start_e,
32                                        CeedInt num_fields, CeedInt Q) {
33   CeedInt dim, ierr, num_comp, size, P;
34   Ceed ceed;
35   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
36   CeedBasis basis;
37   CeedElemRestriction r;
38   CeedOperatorField *op_fields;
39   CeedQFunctionField *qf_fields;
40   if (is_input) {
41     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
42     CeedChkBackend(ierr);
43     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
44     CeedChkBackend(ierr);
45   } else {
46     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields);
47     CeedChkBackend(ierr);
48     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
49     CeedChkBackend(ierr);
50   }
51 
52   // Loop over fields
53   for (CeedInt i=0; i<num_fields; i++) {
54     CeedEvalMode eval_mode;
55     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
56     CeedChkBackend(ierr);
57 
58     if (eval_mode != CEED_EVAL_WEIGHT) {
59       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
60       CeedChkBackend(ierr);
61       Ceed ceed;
62       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
63       CeedInt num_elem, elem_size, l_size, comp_stride;
64       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
65       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
66       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
67       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
68 
69       bool strided;
70       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
71       if (strided) {
72         CeedInt strides[3];
73         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
74         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
75                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
76         CeedChkBackend(ierr);
77       } else {
78         const CeedInt *offsets = NULL;
79         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
80         CeedChkBackend(ierr);
81         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
82         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
83                                                 blk_size, num_comp, comp_stride,
84                                                 l_size, CEED_MEM_HOST,
85                                                 CEED_COPY_VALUES, offsets,
86                                                 &blk_restr[i+start_e]);
87         CeedChkBackend(ierr);
88         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
89       }
90       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
91                                              &e_vecs_full[i+start_e]);
92       CeedChkBackend(ierr);
93     }
94 
95     switch(eval_mode) {
96     case CEED_EVAL_NONE:
97       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
98       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]);
99       CeedChkBackend(ierr);
100       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
101       CeedChkBackend(ierr);
102       break;
103     case CEED_EVAL_INTERP:
104       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
105       ierr = CeedElemRestrictionGetElementSize(r, &P);
106       CeedChkBackend(ierr);
107       ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]);
108       CeedChkBackend(ierr);
109       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
110       CeedChkBackend(ierr);
111       break;
112     case CEED_EVAL_GRAD:
113       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
114       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
115       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
116       ierr = CeedElemRestrictionGetElementSize(r, &P);
117       CeedChkBackend(ierr);
118       ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]);
119       CeedChkBackend(ierr);
120       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
121       CeedChkBackend(ierr);
122       break;
123     case CEED_EVAL_WEIGHT: // Only on input fields
124       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
125       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
126       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
127                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
128       CeedChkBackend(ierr);
129 
130       break;
131     case CEED_EVAL_DIV:
132       break; // Not implemented
133     case CEED_EVAL_CURL:
134       break; // Not implemented
135     }
136     if (is_input && !!e_vecs[i]) {
137       ierr = CeedVectorSetArray(e_vecs[i], CEED_MEM_HOST,
138                                 CEED_COPY_VALUES, NULL); CeedChkBackend(ierr);
139     }
140   }
141   return CEED_ERROR_SUCCESS;
142 }
143 
144 //------------------------------------------------------------------------------
145 // Setup Operator
146 //------------------------------------------------------------------------------
147 static int CeedOperatorSetup_Opt(CeedOperator op) {
148   int ierr;
149   bool is_setup_done;
150   ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr);
151   if (is_setup_done) return CEED_ERROR_SUCCESS;
152   Ceed ceed;
153   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
154   Ceed_Opt *ceed_impl;
155   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
156   const CeedInt blk_size = ceed_impl->blk_size;
157   CeedOperator_Opt *impl;
158   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
159   CeedQFunction qf;
160   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
161   CeedInt Q, num_input_fields, num_output_fields;
162   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
163   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
164   CeedOperatorField *op_input_fields, *op_output_fields;
165   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
166                                &num_output_fields, &op_output_fields);
167   CeedChkBackend(ierr);
168   CeedQFunctionField *qf_input_fields, *qf_output_fields;
169   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
170                                 &qf_output_fields);
171   CeedChkBackend(ierr);
172 
173   // Allocate
174   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
175   CeedChkBackend(ierr);
176   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full);
177   CeedChkBackend(ierr);
178 
179   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr);
180   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
181   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
182   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
183   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
184 
185   impl->num_inputs = num_input_fields;
186   impl->num_outputs = num_output_fields;
187 
188   // Set up infield and outfield pointer arrays
189   // Infields
190   ierr = CeedOperatorSetupFields_Opt(qf, op, true, blk_size, impl->blk_restr,
191                                      impl->e_vecs_full, impl->e_vecs_in,
192                                      impl->q_vecs_in, 0, num_input_fields, Q);
193   CeedChkBackend(ierr);
194   // Outfields
195   ierr = CeedOperatorSetupFields_Opt(qf, op, false, blk_size, impl->blk_restr,
196                                      impl->e_vecs_full, impl->e_vecs_out,
197                                      impl->q_vecs_out, num_input_fields,
198                                      num_output_fields, Q);
199   CeedChkBackend(ierr);
200 
201   // Identity QFunctions
202   if (impl->is_identity_qf) {
203     CeedEvalMode in_mode, out_mode;
204     CeedQFunctionField *in_fields, *out_fields;
205     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
206     CeedChkBackend(ierr);
207     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
208     CeedChkBackend(ierr);
209     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
210     CeedChkBackend(ierr);
211 
212     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
213       impl->is_identity_restr_op = true;
214     } else {
215       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
216       impl->q_vecs_out[0] = impl->q_vecs_in[0];
217       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
218     }
219   }
220 
221   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
222 
223   return CEED_ERROR_SUCCESS;
224 }
225 
226 //------------------------------------------------------------------------------
227 // Setup Input Fields
228 //------------------------------------------------------------------------------
229 static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields,
230     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
231     CeedVector in_vec, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
232     CeedRequest *request) {
233   CeedInt ierr;
234   CeedEvalMode eval_mode;
235   CeedVector vec;
236   uint64_t state;
237 
238   for (CeedInt i=0; i<num_input_fields; i++) {
239     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
240     CeedChkBackend(ierr);
241     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
242     } else {
243       // Get input vector
244       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
245       CeedChkBackend(ierr);
246       if (vec != CEED_VECTOR_ACTIVE) {
247         // Restrict
248         ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
249         if (state != impl->input_states[i]) {
250           ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
251                                           vec, impl->e_vecs_full[i], request);
252           CeedChkBackend(ierr);
253           impl->input_states[i] = state;
254         }
255         // Get evec
256         ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST,
257                                       (const CeedScalar **) &e_data[i]);
258         CeedChkBackend(ierr);
259       } else {
260         // Set Qvec for CEED_EVAL_NONE
261         if (eval_mode == CEED_EVAL_NONE) {
262           ierr = CeedVectorGetArrayRead(impl->e_vecs_in[i], CEED_MEM_HOST,
263                                         (const CeedScalar **)&e_data[i]);
264           CeedChkBackend(ierr);
265           ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
266                                     CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr);
267           ierr = CeedVectorRestoreArrayRead(impl->e_vecs_in[i],
268                                             (const CeedScalar **)&e_data[i]);
269           CeedChkBackend(ierr);
270         }
271       }
272     }
273   }
274   return CEED_ERROR_SUCCESS;
275 }
276 
277 //------------------------------------------------------------------------------
278 // Input Basis Action
279 //------------------------------------------------------------------------------
280 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
281     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
282     CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active,
283     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
284     CeedRequest *request) {
285   CeedInt ierr;
286   CeedInt dim, elem_size, size;
287   CeedElemRestriction elem_restr;
288   CeedEvalMode eval_mode;
289   CeedBasis basis;
290   CeedVector vec;
291 
292   for (CeedInt i=0; i<num_input_fields; i++) {
293     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
294     CeedChkBackend(ierr);
295     // Skip active input
296     if (skip_active) {
297       if (vec == CEED_VECTOR_ACTIVE)
298         continue;
299     }
300 
301     CeedInt active_in = 0;
302     // Get elem_size, eval_mode, size
303     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
304     CeedChkBackend(ierr);
305     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
306     CeedChkBackend(ierr);
307     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
308     CeedChkBackend(ierr);
309     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
310     CeedChkBackend(ierr);
311     // Restrict block active input
312     if (vec == CEED_VECTOR_ACTIVE) {
313       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size,
314                                            CEED_NOTRANSPOSE, in_vec,
315                                            impl->e_vecs_in[i], request);
316       CeedChkBackend(ierr);
317       active_in = 1;
318     }
319     // Basis action
320     switch(eval_mode) {
321     case CEED_EVAL_NONE:
322       if (!active_in) {
323         ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
324                                   CEED_USE_POINTER, &e_data[i][e*Q*size]);
325         CeedChkBackend(ierr);
326       }
327       break;
328     case CEED_EVAL_INTERP:
329       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
330       CeedChkBackend(ierr);
331       if (!active_in) {
332         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
333                                   CEED_USE_POINTER, &e_data[i][e*elem_size*size]);
334         CeedChkBackend(ierr);
335       }
336       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
337                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
338                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
339       break;
340     case CEED_EVAL_GRAD:
341       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
342       CeedChkBackend(ierr);
343       if (!active_in) {
344         ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
345         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
346                                   CEED_USE_POINTER,
347                                   &e_data[i][e*elem_size*size/dim]);
348         CeedChkBackend(ierr);
349       }
350       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
351                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
352                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
353       break;
354     case CEED_EVAL_WEIGHT:
355       break;  // No action
356     // LCOV_EXCL_START
357     case CEED_EVAL_DIV:
358     case CEED_EVAL_CURL: {
359       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
360       CeedChkBackend(ierr);
361       Ceed ceed;
362       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
363       return CeedError(ceed, CEED_ERROR_BACKEND,
364                        "Ceed evaluation mode not implemented");
365       // LCOV_EXCL_STOP
366     }
367     }
368   }
369   return CEED_ERROR_SUCCESS;
370 }
371 
372 //------------------------------------------------------------------------------
373 // Output Basis Action
374 //------------------------------------------------------------------------------
375 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
376     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
377     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
378     CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl,
379     CeedRequest *request) {
380   CeedInt ierr;
381   CeedElemRestriction elem_restr;
382   CeedEvalMode eval_mode;
383   CeedBasis basis;
384   CeedVector vec;
385 
386   for (CeedInt i=0; i<num_output_fields; i++) {
387     // Get elem_size, eval_mode, size
388     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
389     CeedChkBackend(ierr);
390     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
391     CeedChkBackend(ierr);
392     // Basis action
393     switch(eval_mode) {
394     case CEED_EVAL_NONE:
395       break; // No action
396     case CEED_EVAL_INTERP:
397       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
398       CeedChkBackend(ierr);
399       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
400                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
401                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
402       break;
403     case CEED_EVAL_GRAD:
404       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
405       CeedChkBackend(ierr);
406       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
407                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
408                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
409       break;
410     // LCOV_EXCL_START
411     case CEED_EVAL_WEIGHT: {
412       Ceed ceed;
413       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
414       return CeedError(ceed, CEED_ERROR_BACKEND,
415                        "CEED_EVAL_WEIGHT cannot be an output "
416                        "evaluation mode");
417     }
418     case CEED_EVAL_DIV:
419     case CEED_EVAL_CURL: {
420       Ceed ceed;
421       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
422       return CeedError(ceed, CEED_ERROR_BACKEND,
423                        "Ceed evaluation mode not implemented");
424       // LCOV_EXCL_STOP
425     }
426     }
427     // Restrict output block
428     // Get output vector
429     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
430     CeedChkBackend(ierr);
431     if (vec == CEED_VECTOR_ACTIVE)
432       vec = out_vec;
433     // Restrict
434     ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_inputs],
435                                          e/blk_size, CEED_TRANSPOSE,
436                                          impl->e_vecs_out[i], vec, request);
437     CeedChkBackend(ierr);
438   }
439   return CEED_ERROR_SUCCESS;
440 }
441 
442 //------------------------------------------------------------------------------
443 // Restore Input Vectors
444 //------------------------------------------------------------------------------
445 static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields,
446     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
447     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) {
448   CeedInt ierr;
449 
450   for (CeedInt i=0; i<num_input_fields; i++) {
451     CeedEvalMode eval_mode;
452     CeedVector vec;
453     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
454     CeedChkBackend(ierr);
455     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
456     CeedChkBackend(ierr);
457     if (eval_mode != CEED_EVAL_WEIGHT && vec != CEED_VECTOR_ACTIVE) {
458       ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i],
459                                         (const CeedScalar **) &e_data[i]);
460       CeedChkBackend(ierr);
461     }
462   }
463   return CEED_ERROR_SUCCESS;
464 }
465 
466 //------------------------------------------------------------------------------
467 // Operator Apply
468 //------------------------------------------------------------------------------
469 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec,
470                                     CeedVector out_vec, CeedRequest *request) {
471   int ierr;
472   Ceed ceed;
473   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
474   Ceed_Opt *ceed_impl;
475   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
476   CeedInt blk_size = ceed_impl->blk_size;
477   CeedOperator_Opt *impl;
478   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
479   CeedInt Q, num_input_fields, num_output_fields, num_elem;
480   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
481   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
482   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
483   CeedQFunction qf;
484   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
485   CeedOperatorField *op_input_fields, *op_output_fields;
486   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
487                                &num_output_fields, &op_output_fields);
488   CeedChkBackend(ierr);
489   CeedQFunctionField *qf_input_fields, *qf_output_fields;
490   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
491                                 &qf_output_fields);
492   CeedChkBackend(ierr);
493   CeedEvalMode eval_mode;
494   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
495 
496   // Setup
497   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
498 
499   // Restriction only operator
500   if (impl->is_identity_restr_op) {
501     for (CeedInt b=0; b<num_blks; b++) {
502       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE,
503                                            in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr);
504       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE,
505                                            impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr);
506     }
507     return CEED_ERROR_SUCCESS;
508   }
509 
510   // Input Evecs and Restriction
511   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
512                                      op_input_fields, in_vec, e_data,
513                                      impl, request); CeedChkBackend(ierr);
514 
515   // Output Lvecs, Evecs, and Qvecs
516   for (CeedInt i=0; i<num_output_fields; i++) {
517     // Set Qvec if needed
518     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
519     CeedChkBackend(ierr);
520     if (eval_mode == CEED_EVAL_NONE) {
521       // Set qvec to single block evec
522       ierr = CeedVectorGetArrayWrite(impl->e_vecs_out[i], CEED_MEM_HOST,
523                                      &e_data[i + num_input_fields]);
524       CeedChkBackend(ierr);
525       ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
526                                 CEED_USE_POINTER, e_data[i + num_input_fields]);
527       CeedChkBackend(ierr);
528       ierr = CeedVectorRestoreArray(impl->e_vecs_out[i],
529                                     &e_data[i + num_input_fields]);
530       CeedChkBackend(ierr);
531     }
532   }
533 
534   // Loop through elements
535   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
536     // Input basis apply
537     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
538                                       num_input_fields, blk_size, in_vec, false,
539                                       e_data, impl, request); CeedChkBackend(ierr);
540 
541     // Q function
542     if (!impl->is_identity_qf) {
543       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
544       CeedChkBackend(ierr);
545     }
546 
547     // Output basis apply and restrict
548     ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields,
549                                        blk_size, num_input_fields, num_output_fields,
550                                        op, out_vec, impl, request);
551     CeedChkBackend(ierr);
552   }
553 
554   // Restore input arrays
555   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
556                                        op_input_fields, e_data, impl);
557   CeedChkBackend(ierr);
558 
559   return CEED_ERROR_SUCCESS;
560 }
561 
562 //------------------------------------------------------------------------------
563 // Core code for linear QFunction assembly
564 //------------------------------------------------------------------------------
565 static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op,
566     bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
567     CeedRequest *request) {
568   int ierr;
569   Ceed ceed;
570   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
571   Ceed_Opt *ceed_impl;
572   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
573   const CeedInt blk_size = ceed_impl->blk_size;
574   CeedOperator_Opt *impl;
575   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
576   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
577   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
578   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
579   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
580   CeedQFunction qf;
581   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
582   CeedOperatorField *op_input_fields, *op_output_fields;
583   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
584                                &num_output_fields, &op_output_fields);
585   CeedChkBackend(ierr);
586   CeedQFunctionField *qf_input_fields, *qf_output_fields;
587   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
588                                 &qf_output_fields);
589   CeedChkBackend(ierr);
590   CeedVector vec, l_vec = impl->qf_l_vec;
591   CeedInt num_active_in = impl->num_active_in,
592           num_active_out = impl->num_active_out;
593   CeedVector *active_in = impl->qf_active_in;
594   CeedScalar *a, *tmp;
595   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
596 
597   // Setup
598   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
599 
600   // Check for identity
601   if (impl->is_identity_qf)
602     // LCOV_EXCL_START
603     return CeedError(ceed, CEED_ERROR_BACKEND,
604                      "Assembling identity qfunctions not supported");
605   // LCOV_EXCL_STOP
606 
607   // Input Evecs and Restriction
608   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
609                                      op_input_fields, NULL, e_data,
610                                      impl, request); CeedChkBackend(ierr);
611 
612   // Count number of active input fields
613   if (!num_active_in) {
614     for (CeedInt i=0; i<num_input_fields; i++) {
615       // Get input vector
616       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
617       CeedChkBackend(ierr);
618       // Check if active input
619       if (vec == CEED_VECTOR_ACTIVE) {
620         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
621         CeedChkBackend(ierr);
622         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
623         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
624         CeedChkBackend(ierr);
625         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
626         for (CeedInt field=0; field<size; field++) {
627           ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
628           CeedChkBackend(ierr);
629           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
630                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
631           CeedChkBackend(ierr);
632         }
633         num_active_in += size;
634         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
635       }
636     }
637     impl->num_active_in = num_active_in;
638     impl->qf_active_in = active_in;
639   }
640 
641   // Count number of active output fields
642   if (!num_active_out) {
643     for (CeedInt i=0; i<num_output_fields; i++) {
644       // Get output vector
645       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
646       CeedChkBackend(ierr);
647       // Check if active output
648       if (vec == CEED_VECTOR_ACTIVE) {
649         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
650         CeedChkBackend(ierr);
651         num_active_out += size;
652       }
653     }
654     impl->num_active_out = num_active_out;
655   }
656 
657   // Check sizes
658   if (!num_active_in || !num_active_out)
659     // LCOV_EXCL_START
660     return CeedError(ceed, CEED_ERROR_BACKEND,
661                      "Cannot assemble QFunction without active inputs "
662                      "and outputs");
663   // LCOV_EXCL_STOP
664 
665   // Setup l_vec
666   if (!l_vec) {
667     ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
668                             &l_vec); CeedChkBackend(ierr);
669     ierr = CeedVectorSetValue(l_vec, 0.0); CeedChkBackend(ierr);
670     impl->qf_l_vec = l_vec;
671   }
672   ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
673 
674   // Build objects if needed
675   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
676   if (build_objects) {
677     // Create output restriction
678     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
679                                             num_active_in*num_active_out,
680                                             num_active_in*num_active_out*num_elem*Q,
681                                             strides, rstr); CeedChkBackend(ierr);
682     // Create assembled vector
683     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
684                             assembled); CeedChkBackend(ierr);
685   }
686 
687   // Loop through elements
688   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
689     // Input basis apply
690     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
691                                       num_input_fields, blk_size, NULL, true,
692                                       e_data, impl, request); CeedChkBackend(ierr);
693 
694     // Assemble QFunction
695     for (CeedInt in=0; in<num_active_in; in++) {
696       // Set Inputs
697       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
698       if (num_active_in > 1) {
699         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
700                                   0.0); CeedChkBackend(ierr);
701       }
702       // Set Outputs
703       for (CeedInt out=0; out<num_output_fields; out++) {
704         // Get output vector
705         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
706         CeedChkBackend(ierr);
707         // Check if active output
708         if (vec == CEED_VECTOR_ACTIVE) {
709           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
710                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
711           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
712           CeedChkBackend(ierr);
713           a += size*Q*blk_size; // Advance the pointer by the size of the output
714         }
715       }
716       // Apply QFunction
717       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
718       CeedChkBackend(ierr);
719     }
720   }
721 
722   // Un-set output Qvecs to prevent accidental overwrite of Assembled
723   for (CeedInt out=0; out<num_output_fields; out++) {
724     // Get output vector
725     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
726     CeedChkBackend(ierr);
727     // Check if active output
728     if (vec == CEED_VECTOR_ACTIVE) {
729       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
730                          NULL); CeedChkBackend(ierr);
731     }
732   }
733 
734   // Restore input arrays
735   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
736                                        op_input_fields, e_data, impl);
737   CeedChkBackend(ierr);
738 
739   // Output blocked restriction
740   ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr);
741   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
742   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
743   if (!blk_rstr) {
744     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
745            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
746            strides, &blk_rstr); CeedChkBackend(ierr);
747     impl->qf_blk_rstr = blk_rstr;
748   }
749   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled,
750                                   request); CeedChkBackend(ierr);
751 
752   return CEED_ERROR_SUCCESS;
753 }
754 
755 //------------------------------------------------------------------------------
756 // Assemble Linear QFunction
757 //------------------------------------------------------------------------------
758 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op,
759     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
760   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr,
761          request);
762 }
763 
764 //------------------------------------------------------------------------------
765 // Update Assembled Linear QFunction
766 //------------------------------------------------------------------------------
767 static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op,
768     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
769   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled,
770          &rstr, request);
771 }
772 
773 //------------------------------------------------------------------------------
774 // Operator Destroy
775 //------------------------------------------------------------------------------
776 static int CeedOperatorDestroy_Opt(CeedOperator op) {
777   int ierr;
778   CeedOperator_Opt *impl;
779   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
780 
781   for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) {
782     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
783     ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr);
784   }
785   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
786   ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr);
787   ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr);
788 
789   for (CeedInt i=0; i<impl->num_inputs; i++) {
790     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
791     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
792   }
793   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
794   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
795 
796   for (CeedInt i=0; i<impl->num_outputs; i++) {
797     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
798     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
799   }
800   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
801   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
802 
803   // QFunction assembly data
804   for (CeedInt i=0; i<impl->num_active_in; i++) {
805     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
806   }
807   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
808   ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr);
809   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
810 
811   ierr = CeedFree(&impl); CeedChkBackend(ierr);
812   return CEED_ERROR_SUCCESS;
813 }
814 
815 //------------------------------------------------------------------------------
816 // Operator Create
817 //------------------------------------------------------------------------------
818 int CeedOperatorCreate_Opt(CeedOperator op) {
819   int ierr;
820   Ceed ceed;
821   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
822   Ceed_Opt *ceed_impl;
823   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
824   CeedInt blk_size = ceed_impl->blk_size;
825   CeedOperator_Opt *impl;
826 
827   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
828   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
829 
830   if (blk_size != 1 && blk_size != 8)
831     // LCOV_EXCL_START
832     return CeedError(ceed, CEED_ERROR_BACKEND,
833                      "Opt backend cannot use blocksize: %d", blk_size);
834   // LCOV_EXCL_STOP
835 
836   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
837                                 CeedOperatorLinearAssembleQFunction_Opt);
838   CeedChkBackend(ierr);
839   ierr = CeedSetBackendFunction(ceed, "Operator", op,
840                                 "LinearAssembleQFunctionUpdate",
841                                 CeedOperatorLinearAssembleQFunctionUpdate_Opt);
842   CeedChkBackend(ierr);
843   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
844                                 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr);
845   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
846                                 CeedOperatorDestroy_Opt); CeedChkBackend(ierr);
847   return CEED_ERROR_SUCCESS;
848 }
849 //------------------------------------------------------------------------------
850