xref: /libCEED/rust/libceed-sys/c-src/backends/opt/ceed-opt-operator.c (revision d264344313546611a0e282df1f09990e3ff407ce)
189c6efa4Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
289c6efa4Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
389c6efa4Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
489c6efa4Sjeremylt //
589c6efa4Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
689c6efa4Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
789c6efa4Sjeremylt // element discretizations for exascale applications. For more information and
889c6efa4Sjeremylt // source code availability see http://github.com/ceed.
989c6efa4Sjeremylt //
1089c6efa4Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1189c6efa4Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1289c6efa4Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1389c6efa4Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1489c6efa4Sjeremylt // software, applications, hardware, advanced system engineering and early
1589c6efa4Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1689c6efa4Sjeremylt 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
193d576824SJeremy L Thompson #include <stdbool.h>
203d576824SJeremy L Thompson #include <stdint.h>
2189c6efa4Sjeremylt #include <string.h>
2289c6efa4Sjeremylt #include "ceed-opt.h"
2389c6efa4Sjeremylt 
24f10650afSjeremylt //------------------------------------------------------------------------------
25f10650afSjeremylt // Setup Input/Output Fields
26f10650afSjeremylt //------------------------------------------------------------------------------
2789c6efa4Sjeremylt static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
284fc1f125SJeremy L Thompson                                        bool is_input, const CeedInt blk_size,
29d1d35e2fSjeremylt                                        CeedElemRestriction *blk_restr,
304fc1f125SJeremy L Thompson                                        CeedVector *e_vecs_full, CeedVector *e_vecs,
31d1d35e2fSjeremylt                                        CeedVector *q_vecs, CeedInt start_e,
32d1d35e2fSjeremylt                                        CeedInt num_fields, CeedInt Q) {
33ebbcc8a3SJeremy L Thompson   CeedInt ierr, num_comp, size, P;
34*d2643443SJeremy L Thompson   CeedSize e_size, q_size;
3589c6efa4Sjeremylt   Ceed ceed;
36e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
3789c6efa4Sjeremylt   CeedBasis basis;
3889c6efa4Sjeremylt   CeedElemRestriction r;
39d1d35e2fSjeremylt   CeedOperatorField *op_fields;
40d1d35e2fSjeremylt   CeedQFunctionField *qf_fields;
414fc1f125SJeremy L Thompson   if (is_input) {
427e7773b5SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
43e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
447e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
45e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
464fc1f125SJeremy L Thompson   } else {
474fc1f125SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields);
484fc1f125SJeremy L Thompson     CeedChkBackend(ierr);
494fc1f125SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
504fc1f125SJeremy L Thompson     CeedChkBackend(ierr);
5189c6efa4Sjeremylt   }
5289c6efa4Sjeremylt 
5389c6efa4Sjeremylt   // Loop over fields
54d1d35e2fSjeremylt   for (CeedInt i=0; i<num_fields; i++) {
55d1d35e2fSjeremylt     CeedEvalMode eval_mode;
56d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
57d1d35e2fSjeremylt     CeedChkBackend(ierr);
5889c6efa4Sjeremylt 
59d1d35e2fSjeremylt     if (eval_mode != CEED_EVAL_WEIGHT) {
60d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
61e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6289c6efa4Sjeremylt       Ceed ceed;
63e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
64e79b91d9SJeremy L Thompson       CeedSize l_size;
65e79b91d9SJeremy L Thompson       CeedInt num_elem, elem_size, comp_stride;
66d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
67d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
68d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
69d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
70bd33150aSjeremylt 
713ac43b2cSJeremy L Thompson       bool strided;
72e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
733ac43b2cSJeremy L Thompson       if (strided) {
743ac43b2cSJeremy L Thompson         CeedInt strides[3];
75e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
76d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
77d1d35e2fSjeremylt                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
78e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
793ac43b2cSJeremy L Thompson       } else {
80bd33150aSjeremylt         const CeedInt *offsets = NULL;
81bd33150aSjeremylt         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
82e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
83d1d35e2fSjeremylt         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
84d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
85d1d35e2fSjeremylt                                                 blk_size, num_comp, comp_stride,
86d1d35e2fSjeremylt                                                 l_size, CEED_MEM_HOST,
87bd33150aSjeremylt                                                 CEED_COPY_VALUES, offsets,
88d1d35e2fSjeremylt                                                 &blk_restr[i+start_e]);
89e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
90e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
913ac43b2cSJeremy L Thompson       }
92d1d35e2fSjeremylt       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
934fc1f125SJeremy L Thompson                                              &e_vecs_full[i+start_e]);
94e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
9589c6efa4Sjeremylt     }
9689c6efa4Sjeremylt 
97d1d35e2fSjeremylt     switch(eval_mode) {
9889c6efa4Sjeremylt     case CEED_EVAL_NONE:
99d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
100*d2643443SJeremy L Thompson       e_size = (CeedSize)Q*size*blk_size;
101*d2643443SJeremy L Thompson       ierr = CeedVectorCreate(ceed, e_size, &e_vecs[i]); CeedChkBackend(ierr);
102*d2643443SJeremy L Thompson       q_size = (CeedSize)Q*size*blk_size;
103*d2643443SJeremy L Thompson       ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr);
10489c6efa4Sjeremylt       break;
10589c6efa4Sjeremylt     case CEED_EVAL_INTERP:
10689c6efa4Sjeremylt     case CEED_EVAL_GRAD:
107d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
108d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
109ebbcc8a3SJeremy L Thompson       ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr);
110ebbcc8a3SJeremy L Thompson       ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
111*d2643443SJeremy L Thompson       e_size = (CeedSize)P*num_comp*blk_size;
112*d2643443SJeremy L Thompson       ierr = CeedVectorCreate(ceed, e_size, &e_vecs[i]); CeedChkBackend(ierr);
113*d2643443SJeremy L Thompson       q_size = (CeedSize)Q*size*blk_size;
114*d2643443SJeremy L Thompson       ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr);
11589c6efa4Sjeremylt       break;
11689c6efa4Sjeremylt     case CEED_EVAL_WEIGHT: // Only on input fields
117d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
118*d2643443SJeremy L Thompson       q_size = (CeedSize)Q*blk_size;
119*d2643443SJeremy L Thompson       ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr);
120d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
121d1d35e2fSjeremylt                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
122e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
12389c6efa4Sjeremylt 
12489c6efa4Sjeremylt       break;
12589c6efa4Sjeremylt     case CEED_EVAL_DIV:
1265f67fadeSJeremy L Thompson       break; // Not implemented
12789c6efa4Sjeremylt     case CEED_EVAL_CURL:
1285f67fadeSJeremy L Thompson       break; // Not implemented
12989c6efa4Sjeremylt     }
1309c774eddSJeremy L Thompson     if (is_input && !!e_vecs[i]) {
1319c774eddSJeremy L Thompson       ierr = CeedVectorSetArray(e_vecs[i], CEED_MEM_HOST,
1329c774eddSJeremy L Thompson                                 CEED_COPY_VALUES, NULL); CeedChkBackend(ierr);
1339c774eddSJeremy L Thompson     }
13489c6efa4Sjeremylt   }
135e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
13689c6efa4Sjeremylt }
13789c6efa4Sjeremylt 
138f10650afSjeremylt //------------------------------------------------------------------------------
139f10650afSjeremylt // Setup Operator
140f10650afSjeremylt //------------------------------------------------------------------------------
14189c6efa4Sjeremylt static int CeedOperatorSetup_Opt(CeedOperator op) {
14289c6efa4Sjeremylt   int ierr;
1438c1105f8SJeremy L Thompson   bool is_setup_done;
1448c1105f8SJeremy L Thompson   ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr);
1458c1105f8SJeremy L Thompson   if (is_setup_done) return CEED_ERROR_SUCCESS;
14689c6efa4Sjeremylt   Ceed ceed;
147e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
148d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
149d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
150d1d35e2fSjeremylt   const CeedInt blk_size = ceed_impl->blk_size;
15189c6efa4Sjeremylt   CeedOperator_Opt *impl;
152e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
15389c6efa4Sjeremylt   CeedQFunction qf;
154e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
155d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields;
156e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
1570b454692Sjeremylt   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
158d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
1597e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
1607e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
161e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
162d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
1637e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
1647e7773b5SJeremy L Thompson                                 &qf_output_fields);
165e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
16689c6efa4Sjeremylt 
16789c6efa4Sjeremylt   // Allocate
168d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
169e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1704fc1f125SJeremy L Thompson   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full);
171e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
17289c6efa4Sjeremylt 
1734fc1f125SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr);
174bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
175bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
176bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
177bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
17889c6efa4Sjeremylt 
1794fc1f125SJeremy L Thompson   impl->num_inputs = num_input_fields;
1804fc1f125SJeremy L Thompson   impl->num_outputs = num_output_fields;
18189c6efa4Sjeremylt 
18289c6efa4Sjeremylt   // Set up infield and outfield pointer arrays
18389c6efa4Sjeremylt   // Infields
1844fc1f125SJeremy L Thompson   ierr = CeedOperatorSetupFields_Opt(qf, op, true, blk_size, impl->blk_restr,
1854fc1f125SJeremy L Thompson                                      impl->e_vecs_full, impl->e_vecs_in,
186d1d35e2fSjeremylt                                      impl->q_vecs_in, 0, num_input_fields, Q);
187e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
18889c6efa4Sjeremylt   // Outfields
1894fc1f125SJeremy L Thompson   ierr = CeedOperatorSetupFields_Opt(qf, op, false, blk_size, impl->blk_restr,
1904fc1f125SJeremy L Thompson                                      impl->e_vecs_full, impl->e_vecs_out,
191d1d35e2fSjeremylt                                      impl->q_vecs_out, num_input_fields,
192d1d35e2fSjeremylt                                      num_output_fields, Q);
193e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
19489c6efa4Sjeremylt 
19516911fdaSjeremylt   // Identity QFunctions
1960b454692Sjeremylt   if (impl->is_identity_qf) {
197d1d35e2fSjeremylt     CeedEvalMode in_mode, out_mode;
198d1d35e2fSjeremylt     CeedQFunctionField *in_fields, *out_fields;
1997e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
200e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
2010b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
202d1d35e2fSjeremylt     CeedChkBackend(ierr);
2030b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
204d1d35e2fSjeremylt     CeedChkBackend(ierr);
205d1d35e2fSjeremylt 
2060b454692Sjeremylt     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
2070b454692Sjeremylt       impl->is_identity_restr_op = true;
2080b454692Sjeremylt     } else {
2090b454692Sjeremylt       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
2100b454692Sjeremylt       impl->q_vecs_out[0] = impl->q_vecs_in[0];
2110b454692Sjeremylt       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
21216911fdaSjeremylt     }
21316911fdaSjeremylt   }
21416911fdaSjeremylt 
215e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
21689c6efa4Sjeremylt 
217e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
21889c6efa4Sjeremylt }
21989c6efa4Sjeremylt 
220f10650afSjeremylt //------------------------------------------------------------------------------
221f10650afSjeremylt // Setup Input Fields
222f10650afSjeremylt //------------------------------------------------------------------------------
223d1d35e2fSjeremylt static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields,
224d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
225a0162de9SJeremy L Thompson     CeedVector in_vec, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
226a0162de9SJeremy L Thompson     CeedRequest *request) {
2271d102b48SJeremy L Thompson   CeedInt ierr;
228d1d35e2fSjeremylt   CeedEvalMode eval_mode;
22989c6efa4Sjeremylt   CeedVector vec;
23089c6efa4Sjeremylt   uint64_t state;
23189c6efa4Sjeremylt 
232d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
233d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
234e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
235d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
23689c6efa4Sjeremylt     } else {
23789c6efa4Sjeremylt       // Get input vector
238d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
239d1d35e2fSjeremylt       CeedChkBackend(ierr);
24089c6efa4Sjeremylt       if (vec != CEED_VECTOR_ACTIVE) {
24189c6efa4Sjeremylt         // Restrict
242e15f9bd0SJeremy L Thompson         ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
2434fc1f125SJeremy L Thompson         if (state != impl->input_states[i]) {
244d1d35e2fSjeremylt           ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
2454fc1f125SJeremy L Thompson                                           vec, impl->e_vecs_full[i], request);
246e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
2474fc1f125SJeremy L Thompson           impl->input_states[i] = state;
24889c6efa4Sjeremylt         }
24989c6efa4Sjeremylt         // Get evec
2504fc1f125SJeremy L Thompson         ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST,
251a0162de9SJeremy L Thompson                                       (const CeedScalar **) &e_data[i]);
252e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
2539c774eddSJeremy L Thompson       } else {
2549c774eddSJeremy L Thompson         // Set Qvec for CEED_EVAL_NONE
2559c774eddSJeremy L Thompson         if (eval_mode == CEED_EVAL_NONE) {
2569c774eddSJeremy L Thompson           ierr = CeedVectorGetArrayRead(impl->e_vecs_in[i], CEED_MEM_HOST,
2579c774eddSJeremy L Thompson                                         (const CeedScalar **)&e_data[i]);
2589c774eddSJeremy L Thompson           CeedChkBackend(ierr);
2599c774eddSJeremy L Thompson           ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
2609c774eddSJeremy L Thompson                                     CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr);
2619c774eddSJeremy L Thompson           ierr = CeedVectorRestoreArrayRead(impl->e_vecs_in[i],
2629c774eddSJeremy L Thompson                                             (const CeedScalar **)&e_data[i]);
2639c774eddSJeremy L Thompson           CeedChkBackend(ierr);
2649c774eddSJeremy L Thompson         }
2659c774eddSJeremy L Thompson       }
26689c6efa4Sjeremylt     }
26789c6efa4Sjeremylt   }
268e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2691d102b48SJeremy L Thompson }
27089c6efa4Sjeremylt 
271f10650afSjeremylt //------------------------------------------------------------------------------
272f10650afSjeremylt // Input Basis Action
273f10650afSjeremylt //------------------------------------------------------------------------------
2741d102b48SJeremy L Thompson static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
275d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
276d1d35e2fSjeremylt     CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active,
277a0162de9SJeremy L Thompson     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
278a0162de9SJeremy L Thompson     CeedRequest *request) {
2791d102b48SJeremy L Thompson   CeedInt ierr;
280d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
281d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
282d1d35e2fSjeremylt   CeedEvalMode eval_mode;
2831d102b48SJeremy L Thompson   CeedBasis basis;
2841d102b48SJeremy L Thompson   CeedVector vec;
28589c6efa4Sjeremylt 
286d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
287d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
288d1d35e2fSjeremylt     CeedChkBackend(ierr);
2891d102b48SJeremy L Thompson     // Skip active input
290d1d35e2fSjeremylt     if (skip_active) {
2911d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
2921d102b48SJeremy L Thompson         continue;
2931d102b48SJeremy L Thompson     }
2941d102b48SJeremy L Thompson 
295d1d35e2fSjeremylt     CeedInt active_in = 0;
296d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
297d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
298e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
299d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
300e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
301d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
302e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
303d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
304d1d35e2fSjeremylt     CeedChkBackend(ierr);
30589c6efa4Sjeremylt     // Restrict block active input
30689c6efa4Sjeremylt     if (vec == CEED_VECTOR_ACTIVE) {
307d1d35e2fSjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size,
308d1d35e2fSjeremylt                                            CEED_NOTRANSPOSE, in_vec,
309d1d35e2fSjeremylt                                            impl->e_vecs_in[i], request);
310e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
311d1d35e2fSjeremylt       active_in = 1;
31289c6efa4Sjeremylt     }
31389c6efa4Sjeremylt     // Basis action
314d1d35e2fSjeremylt     switch(eval_mode) {
31589c6efa4Sjeremylt     case CEED_EVAL_NONE:
316d1d35e2fSjeremylt       if (!active_in) {
317d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
318a0162de9SJeremy L Thompson                                   CEED_USE_POINTER, &e_data[i][e*Q*size]);
319a0162de9SJeremy L Thompson         CeedChkBackend(ierr);
32089c6efa4Sjeremylt       }
32189c6efa4Sjeremylt       break;
32289c6efa4Sjeremylt     case CEED_EVAL_INTERP:
323d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
324e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
325d1d35e2fSjeremylt       if (!active_in) {
326d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
327a0162de9SJeremy L Thompson                                   CEED_USE_POINTER, &e_data[i][e*elem_size*size]);
328e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
32989c6efa4Sjeremylt       }
330d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
331d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
332d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
33389c6efa4Sjeremylt       break;
33489c6efa4Sjeremylt     case CEED_EVAL_GRAD:
335d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
336e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
337d1d35e2fSjeremylt       if (!active_in) {
338e15f9bd0SJeremy L Thompson         ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
339d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
34089c6efa4Sjeremylt                                   CEED_USE_POINTER,
341a0162de9SJeremy L Thompson                                   &e_data[i][e*elem_size*size/dim]);
342e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
34389c6efa4Sjeremylt       }
344d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
345d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
346d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
34789c6efa4Sjeremylt       break;
34889c6efa4Sjeremylt     case CEED_EVAL_WEIGHT:
34989c6efa4Sjeremylt       break;  // No action
350bbfacfcdSjeremylt     // LCOV_EXCL_START
35189c6efa4Sjeremylt     case CEED_EVAL_DIV:
3521d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
353d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
354e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
3551d102b48SJeremy L Thompson       Ceed ceed;
356e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
357e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
358e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
359bbfacfcdSjeremylt       // LCOV_EXCL_STOP
3601d102b48SJeremy L Thompson     }
3611d102b48SJeremy L Thompson     }
3621d102b48SJeremy L Thompson   }
363e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3641d102b48SJeremy L Thompson }
36589c6efa4Sjeremylt 
366f10650afSjeremylt //------------------------------------------------------------------------------
367f10650afSjeremylt // Output Basis Action
368f10650afSjeremylt //------------------------------------------------------------------------------
3691d102b48SJeremy L Thompson static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
370d1d35e2fSjeremylt     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
371d1d35e2fSjeremylt     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
372d1d35e2fSjeremylt     CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl,
3731d102b48SJeremy L Thompson     CeedRequest *request) {
3741d102b48SJeremy L Thompson   CeedInt ierr;
375d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
376d1d35e2fSjeremylt   CeedEvalMode eval_mode;
3771d102b48SJeremy L Thompson   CeedBasis basis;
3781d102b48SJeremy L Thompson   CeedVector vec;
3791d102b48SJeremy L Thompson 
380d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
381d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
382d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
383e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
384d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
385e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
38689c6efa4Sjeremylt     // Basis action
387d1d35e2fSjeremylt     switch(eval_mode) {
38889c6efa4Sjeremylt     case CEED_EVAL_NONE:
38989c6efa4Sjeremylt       break; // No action
39089c6efa4Sjeremylt     case CEED_EVAL_INTERP:
391d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
392e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
393d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
394d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
395d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
39689c6efa4Sjeremylt       break;
39789c6efa4Sjeremylt     case CEED_EVAL_GRAD:
398d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
399e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
400d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
401d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
402d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
40389c6efa4Sjeremylt       break;
404c042f62fSJeremy L Thompson     // LCOV_EXCL_START
405bbfacfcdSjeremylt     case CEED_EVAL_WEIGHT: {
40689c6efa4Sjeremylt       Ceed ceed;
407e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
408e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
409e15f9bd0SJeremy L Thompson                        "CEED_EVAL_WEIGHT cannot be an output "
4101d102b48SJeremy L Thompson                        "evaluation mode");
41189c6efa4Sjeremylt     }
41289c6efa4Sjeremylt     case CEED_EVAL_DIV:
4131d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
4141d102b48SJeremy L Thompson       Ceed ceed;
415e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
416e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
417e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
418bbfacfcdSjeremylt       // LCOV_EXCL_STOP
4191d102b48SJeremy L Thompson     }
42089c6efa4Sjeremylt     }
42189c6efa4Sjeremylt     // Restrict output block
42289c6efa4Sjeremylt     // Get output vector
423d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
424e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
42589c6efa4Sjeremylt     if (vec == CEED_VECTOR_ACTIVE)
426d1d35e2fSjeremylt       vec = out_vec;
42789c6efa4Sjeremylt     // Restrict
4284fc1f125SJeremy L Thompson     ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_inputs],
429d1d35e2fSjeremylt                                          e/blk_size, CEED_TRANSPOSE,
430d1d35e2fSjeremylt                                          impl->e_vecs_out[i], vec, request);
431e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
43289c6efa4Sjeremylt   }
433e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
43489c6efa4Sjeremylt }
43589c6efa4Sjeremylt 
436f10650afSjeremylt //------------------------------------------------------------------------------
437f10650afSjeremylt // Restore Input Vectors
438f10650afSjeremylt //------------------------------------------------------------------------------
439d1d35e2fSjeremylt static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields,
440d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
441a0162de9SJeremy L Thompson     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) {
4421d102b48SJeremy L Thompson   CeedInt ierr;
4431d102b48SJeremy L Thompson 
444d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
4459c774eddSJeremy L Thompson     CeedEvalMode eval_mode;
4469c774eddSJeremy L Thompson     CeedVector vec;
447d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
448e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
4499c774eddSJeremy L Thompson     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
4509c774eddSJeremy L Thompson     CeedChkBackend(ierr);
4519c774eddSJeremy L Thompson     if (eval_mode != CEED_EVAL_WEIGHT && vec != CEED_VECTOR_ACTIVE) {
4524fc1f125SJeremy L Thompson       ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i],
453a0162de9SJeremy L Thompson                                         (const CeedScalar **) &e_data[i]);
454e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
45589c6efa4Sjeremylt     }
45689c6efa4Sjeremylt   }
457e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4581d102b48SJeremy L Thompson }
4591d102b48SJeremy L Thompson 
460f10650afSjeremylt //------------------------------------------------------------------------------
461f10650afSjeremylt // Operator Apply
462f10650afSjeremylt //------------------------------------------------------------------------------
463d1d35e2fSjeremylt static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec,
464d1d35e2fSjeremylt                                     CeedVector out_vec, CeedRequest *request) {
4651d102b48SJeremy L Thompson   int ierr;
4661d102b48SJeremy L Thompson   Ceed ceed;
467e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
468d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
469d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
470d1d35e2fSjeremylt   CeedInt blk_size = ceed_impl->blk_size;
4711d102b48SJeremy L Thompson   CeedOperator_Opt *impl;
472e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
473d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem;
474d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
475e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
476d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
4771d102b48SJeremy L Thompson   CeedQFunction qf;
478e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
479d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
4807e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
4817e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
482e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
483d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
4847e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
4857e7773b5SJeremy L Thompson                                 &qf_output_fields);
486e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
487d1d35e2fSjeremylt   CeedEvalMode eval_mode;
488a0162de9SJeremy L Thompson   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
4891d102b48SJeremy L Thompson 
4901d102b48SJeremy L Thompson   // Setup
491e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
4921d102b48SJeremy L Thompson 
4930b454692Sjeremylt   // Restriction only operator
4940b454692Sjeremylt   if (impl->is_identity_restr_op) {
4950b454692Sjeremylt     for (CeedInt b=0; b<num_blks; b++) {
4960b454692Sjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE,
4970b454692Sjeremylt                                            in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr);
4980b454692Sjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE,
4990b454692Sjeremylt                                            impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr);
5000b454692Sjeremylt     }
5010b454692Sjeremylt     return CEED_ERROR_SUCCESS;
5020b454692Sjeremylt   }
5030b454692Sjeremylt 
5041d102b48SJeremy L Thompson   // Input Evecs and Restriction
505d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
506a0162de9SJeremy L Thompson                                      op_input_fields, in_vec, e_data,
507a0162de9SJeremy L Thompson                                      impl, request); CeedChkBackend(ierr);
5081d102b48SJeremy L Thompson 
5091d102b48SJeremy L Thompson   // Output Lvecs, Evecs, and Qvecs
510d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
5111d102b48SJeremy L Thompson     // Set Qvec if needed
512d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
513e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
514d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_NONE) {
5151d102b48SJeremy L Thompson       // Set qvec to single block evec
5169c774eddSJeremy L Thompson       ierr = CeedVectorGetArrayWrite(impl->e_vecs_out[i], CEED_MEM_HOST,
517a0162de9SJeremy L Thompson                                      &e_data[i + num_input_fields]);
518e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
519d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
520a0162de9SJeremy L Thompson                                 CEED_USE_POINTER, e_data[i + num_input_fields]);
521a0162de9SJeremy L Thompson       CeedChkBackend(ierr);
522d1d35e2fSjeremylt       ierr = CeedVectorRestoreArray(impl->e_vecs_out[i],
523a0162de9SJeremy L Thompson                                     &e_data[i + num_input_fields]);
524e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
5251d102b48SJeremy L Thompson     }
5261d102b48SJeremy L Thompson   }
5271d102b48SJeremy L Thompson 
5281d102b48SJeremy L Thompson   // Loop through elements
529d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
5301d102b48SJeremy L Thompson     // Input basis apply
531d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
532d1d35e2fSjeremylt                                       num_input_fields, blk_size, in_vec, false,
533a0162de9SJeremy L Thompson                                       e_data, impl, request); CeedChkBackend(ierr);
5341d102b48SJeremy L Thompson 
5351d102b48SJeremy L Thompson     // Q function
5360b454692Sjeremylt     if (!impl->is_identity_qf) {
537d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
538e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
53916911fdaSjeremylt     }
5401d102b48SJeremy L Thompson 
5411d102b48SJeremy L Thompson     // Output basis apply and restrict
542d1d35e2fSjeremylt     ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields,
543d1d35e2fSjeremylt                                        blk_size, num_input_fields, num_output_fields,
544d1d35e2fSjeremylt                                        op, out_vec, impl, request);
545e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
5461d102b48SJeremy L Thompson   }
5471d102b48SJeremy L Thompson 
5481d102b48SJeremy L Thompson   // Restore input arrays
549d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
550a0162de9SJeremy L Thompson                                        op_input_fields, e_data, impl);
551e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
55289c6efa4Sjeremylt 
553e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
55489c6efa4Sjeremylt }
55589c6efa4Sjeremylt 
556f10650afSjeremylt //------------------------------------------------------------------------------
55770a7ffb3SJeremy L Thompson // Core code for linear QFunction assembly
558f10650afSjeremylt //------------------------------------------------------------------------------
55970a7ffb3SJeremy L Thompson static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op,
56070a7ffb3SJeremy L Thompson     bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
56170a7ffb3SJeremy L Thompson     CeedRequest *request) {
5621d102b48SJeremy L Thompson   int ierr;
5631d102b48SJeremy L Thompson   Ceed ceed;
564e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
565d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
566d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
567d1d35e2fSjeremylt   const CeedInt blk_size = ceed_impl->blk_size;
568*d2643443SJeremy L Thompson   CeedSize q_size;
5691d102b48SJeremy L Thompson   CeedOperator_Opt *impl;
570e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
571d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
572d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
573e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
574d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
5751d102b48SJeremy L Thompson   CeedQFunction qf;
576e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
577d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
5787e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
5797e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
580e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
581d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
5827e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
5837e7773b5SJeremy L Thompson                                 &qf_output_fields);
584e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
5854fc1f125SJeremy L Thompson   CeedVector vec, l_vec = impl->qf_l_vec;
5864fc1f125SJeremy L Thompson   CeedInt num_active_in = impl->num_active_in,
5874fc1f125SJeremy L Thompson           num_active_out = impl->num_active_out;
588bb219a0fSJeremy L Thompson   CeedVector *active_in = impl->qf_active_in;
5891d102b48SJeremy L Thompson   CeedScalar *a, *tmp;
590a0162de9SJeremy L Thompson   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
5911d102b48SJeremy L Thompson 
5921d102b48SJeremy L Thompson   // Setup
593e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
5941d102b48SJeremy L Thompson 
59516911fdaSjeremylt   // Check for identity
5960b454692Sjeremylt   if (impl->is_identity_qf)
59716911fdaSjeremylt     // LCOV_EXCL_START
598e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
599e15f9bd0SJeremy L Thompson                      "Assembling identity qfunctions not supported");
60016911fdaSjeremylt   // LCOV_EXCL_STOP
60116911fdaSjeremylt 
6021d102b48SJeremy L Thompson   // Input Evecs and Restriction
603d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
604a0162de9SJeremy L Thompson                                      op_input_fields, NULL, e_data,
605a0162de9SJeremy L Thompson                                      impl, request); CeedChkBackend(ierr);
6061d102b48SJeremy L Thompson 
6071d102b48SJeremy L Thompson   // Count number of active input fields
608bb219a0fSJeremy L Thompson   if (!num_active_in) {
609d1d35e2fSjeremylt     for (CeedInt i=0; i<num_input_fields; i++) {
6101d102b48SJeremy L Thompson       // Get input vector
611d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
612d1d35e2fSjeremylt       CeedChkBackend(ierr);
6131d102b48SJeremy L Thompson       // Check if active input
6141d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE) {
615d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
616e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
617d1d35e2fSjeremylt         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
618d1d35e2fSjeremylt         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
619d1d35e2fSjeremylt         CeedChkBackend(ierr);
620d1d35e2fSjeremylt         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
6211d102b48SJeremy L Thompson         for (CeedInt field=0; field<size; field++) {
622*d2643443SJeremy L Thompson           q_size = (CeedSize)Q*blk_size;
623*d2643443SJeremy L Thompson           ierr = CeedVectorCreate(ceed, q_size, &active_in[num_active_in+field]);
624e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
625d1d35e2fSjeremylt           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
626d1d35e2fSjeremylt                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
627e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
6281d102b48SJeremy L Thompson         }
629d1d35e2fSjeremylt         num_active_in += size;
630d1d35e2fSjeremylt         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
6311d102b48SJeremy L Thompson       }
63289c6efa4Sjeremylt     }
6334fc1f125SJeremy L Thompson     impl->num_active_in = num_active_in;
634bb219a0fSJeremy L Thompson     impl->qf_active_in = active_in;
635bb219a0fSJeremy L Thompson   }
63689c6efa4Sjeremylt 
6371d102b48SJeremy L Thompson   // Count number of active output fields
638bb219a0fSJeremy L Thompson   if (!num_active_out) {
639d1d35e2fSjeremylt     for (CeedInt i=0; i<num_output_fields; i++) {
6401d102b48SJeremy L Thompson       // Get output vector
641d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
642e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6431d102b48SJeremy L Thompson       // Check if active output
6441d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE) {
645d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
646e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
647d1d35e2fSjeremylt         num_active_out += size;
6481d102b48SJeremy L Thompson       }
6491d102b48SJeremy L Thompson     }
6504fc1f125SJeremy L Thompson     impl->num_active_out = num_active_out;
651bb219a0fSJeremy L Thompson   }
6521d102b48SJeremy L Thompson 
6531d102b48SJeremy L Thompson   // Check sizes
654d1d35e2fSjeremylt   if (!num_active_in || !num_active_out)
6551d102b48SJeremy L Thompson     // LCOV_EXCL_START
656e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
657e15f9bd0SJeremy L Thompson                      "Cannot assemble QFunction without active inputs "
6581d102b48SJeremy L Thompson                      "and outputs");
6591d102b48SJeremy L Thompson   // LCOV_EXCL_STOP
6601d102b48SJeremy L Thompson 
6614fc1f125SJeremy L Thompson   // Setup l_vec
6624fc1f125SJeremy L Thompson   if (!l_vec) {
663*d2643443SJeremy L Thompson     CeedSize l_size = (CeedSize)blk_size*Q*num_active_in*num_active_out;
664*d2643443SJeremy L Thompson     ierr = CeedVectorCreate(ceed, l_size, &l_vec); CeedChkBackend(ierr);
6659c774eddSJeremy L Thompson     ierr = CeedVectorSetValue(l_vec, 0.0); CeedChkBackend(ierr);
6664fc1f125SJeremy L Thompson     impl->qf_l_vec = l_vec;
667bb219a0fSJeremy L Thompson   }
6681d102b48SJeremy L Thompson 
66970a7ffb3SJeremy L Thompson   // Build objects if needed
670d1d35e2fSjeremylt   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
67170a7ffb3SJeremy L Thompson   if (build_objects) {
67270a7ffb3SJeremy L Thompson     // Create output restriction
673d1d35e2fSjeremylt     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
674d1d35e2fSjeremylt                                             num_active_in*num_active_out,
675d1d35e2fSjeremylt                                             num_active_in*num_active_out*num_elem*Q,
676e15f9bd0SJeremy L Thompson                                             strides, rstr); CeedChkBackend(ierr);
6771d102b48SJeremy L Thompson     // Create assembled vector
678*d2643443SJeremy L Thompson     CeedSize l_size = (CeedSize)num_elem*Q*num_active_in*num_active_out;
679*d2643443SJeremy L Thompson     ierr = CeedVectorCreate(ceed, l_size, assembled); CeedChkBackend(ierr);
68070a7ffb3SJeremy L Thompson   }
6811d102b48SJeremy L Thompson 
68216e5f7d7SJeremy L Thompson   // Output blocked restriction
68316e5f7d7SJeremy L Thompson   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
68416e5f7d7SJeremy L Thompson   if (!blk_rstr) {
68516e5f7d7SJeremy L Thompson     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
68616e5f7d7SJeremy L Thompson            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
68716e5f7d7SJeremy L Thompson            strides, &blk_rstr); CeedChkBackend(ierr);
68816e5f7d7SJeremy L Thompson     impl->qf_blk_rstr = blk_rstr;
68916e5f7d7SJeremy L Thompson   }
69016e5f7d7SJeremy L Thompson 
6911d102b48SJeremy L Thompson   // Loop through elements
69216e5f7d7SJeremy L Thompson   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
693d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
69416e5f7d7SJeremy L Thompson     ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
69516e5f7d7SJeremy L Thompson 
6961d102b48SJeremy L Thompson     // Input basis apply
697d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
698d1d35e2fSjeremylt                                       num_input_fields, blk_size, NULL, true,
699a0162de9SJeremy L Thompson                                       e_data, impl, request); CeedChkBackend(ierr);
7001d102b48SJeremy L Thompson 
7011d102b48SJeremy L Thompson     // Assemble QFunction
702d1d35e2fSjeremylt     for (CeedInt in=0; in<num_active_in; in++) {
7031d102b48SJeremy L Thompson       // Set Inputs
704d1d35e2fSjeremylt       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
705d1d35e2fSjeremylt       if (num_active_in > 1) {
706d1d35e2fSjeremylt         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
707e15f9bd0SJeremy L Thompson                                   0.0); CeedChkBackend(ierr);
70842ea3801Sjeremylt       }
7091d102b48SJeremy L Thompson       // Set Outputs
710d1d35e2fSjeremylt       for (CeedInt out=0; out<num_output_fields; out++) {
7111d102b48SJeremy L Thompson         // Get output vector
712d1d35e2fSjeremylt         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
713e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
7141d102b48SJeremy L Thompson         // Check if active output
7151d102b48SJeremy L Thompson         if (vec == CEED_VECTOR_ACTIVE) {
716d1d35e2fSjeremylt           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
717e15f9bd0SJeremy L Thompson                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
718d1d35e2fSjeremylt           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
719e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
720d1d35e2fSjeremylt           a += size*Q*blk_size; // Advance the pointer by the size of the output
7211d102b48SJeremy L Thompson         }
7221d102b48SJeremy L Thompson       }
7231d102b48SJeremy L Thompson       // Apply QFunction
724d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
725e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
7261d102b48SJeremy L Thompson     }
72716e5f7d7SJeremy L Thompson 
72816e5f7d7SJeremy L Thompson     // Assemble into assembled vector
72916e5f7d7SJeremy L Thompson     ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr);
73016e5f7d7SJeremy L Thompson     ierr = CeedElemRestrictionApplyBlock(blk_rstr, e/blk_size, CEED_TRANSPOSE,
73116e5f7d7SJeremy L Thompson                                          l_vec, *assembled, request); CeedChkBackend(ierr);
7321d102b48SJeremy L Thompson   }
7331d102b48SJeremy L Thompson 
7341d102b48SJeremy L Thompson   // Un-set output Qvecs to prevent accidental overwrite of Assembled
735d1d35e2fSjeremylt   for (CeedInt out=0; out<num_output_fields; out++) {
7361d102b48SJeremy L Thompson     // Get output vector
737d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
738e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
7391d102b48SJeremy L Thompson     // Check if active output
7401d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
741d1d35e2fSjeremylt       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
742e15f9bd0SJeremy L Thompson                          NULL); CeedChkBackend(ierr);
7431d102b48SJeremy L Thompson     }
7441d102b48SJeremy L Thompson   }
7451d102b48SJeremy L Thompson 
7461d102b48SJeremy L Thompson   // Restore input arrays
747d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
748a0162de9SJeremy L Thompson                                        op_input_fields, e_data, impl);
749e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
7501d102b48SJeremy L Thompson 
751e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
75289c6efa4Sjeremylt }
75389c6efa4Sjeremylt 
754f10650afSjeremylt //------------------------------------------------------------------------------
75570a7ffb3SJeremy L Thompson // Assemble Linear QFunction
75670a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
75770a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op,
75870a7ffb3SJeremy L Thompson     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
75970a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr,
76070a7ffb3SJeremy L Thompson          request);
76170a7ffb3SJeremy L Thompson }
76270a7ffb3SJeremy L Thompson 
76370a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
76470a7ffb3SJeremy L Thompson // Update Assembled Linear QFunction
76570a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
76670a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op,
76770a7ffb3SJeremy L Thompson     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
76870a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled,
76970a7ffb3SJeremy L Thompson          &rstr, request);
77070a7ffb3SJeremy L Thompson }
77170a7ffb3SJeremy L Thompson 
77270a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
773f10650afSjeremylt // Operator Destroy
774f10650afSjeremylt //------------------------------------------------------------------------------
775f10650afSjeremylt static int CeedOperatorDestroy_Opt(CeedOperator op) {
776f10650afSjeremylt   int ierr;
777f10650afSjeremylt   CeedOperator_Opt *impl;
778e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
779f10650afSjeremylt 
7804fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) {
781d1d35e2fSjeremylt     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
7824fc1f125SJeremy L Thompson     ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr);
783f10650afSjeremylt   }
784d1d35e2fSjeremylt   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
7854fc1f125SJeremy L Thompson   ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr);
7864fc1f125SJeremy L Thompson   ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr);
787f10650afSjeremylt 
7884fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_inputs; i++) {
789d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
790d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
791f10650afSjeremylt   }
792d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
793d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
794f10650afSjeremylt 
7954fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_outputs; i++) {
796d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
797d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
798f10650afSjeremylt   }
799d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
800d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
801f10650afSjeremylt 
802bb219a0fSJeremy L Thompson   // QFunction assembly data
8034fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_active_in; i++) {
804bb219a0fSJeremy L Thompson     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
805bb219a0fSJeremy L Thompson   }
806bb219a0fSJeremy L Thompson   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
8074fc1f125SJeremy L Thompson   ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr);
808bb219a0fSJeremy L Thompson   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
809bb219a0fSJeremy L Thompson 
810e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
811e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
812f10650afSjeremylt }
813f10650afSjeremylt 
814f10650afSjeremylt //------------------------------------------------------------------------------
815f10650afSjeremylt // Operator Create
816f10650afSjeremylt //------------------------------------------------------------------------------
81789c6efa4Sjeremylt int CeedOperatorCreate_Opt(CeedOperator op) {
81889c6efa4Sjeremylt   int ierr;
81989c6efa4Sjeremylt   Ceed ceed;
820e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
821d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
822d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
823d1d35e2fSjeremylt   CeedInt blk_size = ceed_impl->blk_size;
82489c6efa4Sjeremylt   CeedOperator_Opt *impl;
82589c6efa4Sjeremylt 
826e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
827e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
82889c6efa4Sjeremylt 
829d1d35e2fSjeremylt   if (blk_size != 1 && blk_size != 8)
83082946b17Sjeremylt     // LCOV_EXCL_START
831e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
832d1d35e2fSjeremylt                      "Opt backend cannot use blocksize: %d", blk_size);
83382946b17Sjeremylt   // LCOV_EXCL_STOP
83482946b17Sjeremylt 
83580ac2e43SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
83680ac2e43SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunction_Opt);
837e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
83870a7ffb3SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op,
83970a7ffb3SJeremy L Thompson                                 "LinearAssembleQFunctionUpdate",
84070a7ffb3SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunctionUpdate_Opt);
84170a7ffb3SJeremy L Thompson   CeedChkBackend(ierr);
842cae8b89aSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
843e15f9bd0SJeremy L Thompson                                 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr);
84489c6efa4Sjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
845e15f9bd0SJeremy L Thompson                                 CeedOperatorDestroy_Opt); CeedChkBackend(ierr);
846e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
84789c6efa4Sjeremylt }
848f10650afSjeremylt //------------------------------------------------------------------------------
849