xref: /libCEED/backends/opt/ceed-opt-operator.c (revision bf4cb66493dbcc06b8d25c9c91cf89fe1cab7c9b)
189c6efa4Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
289c6efa4Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
389c6efa4Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
489c6efa4Sjeremylt //
589c6efa4Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
689c6efa4Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
789c6efa4Sjeremylt // element discretizations for exascale applications. For more information and
889c6efa4Sjeremylt // source code availability see http://github.com/ceed.
989c6efa4Sjeremylt //
1089c6efa4Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1189c6efa4Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1289c6efa4Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1389c6efa4Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1489c6efa4Sjeremylt // software, applications, hardware, advanced system engineering and early
1589c6efa4Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1689c6efa4Sjeremylt 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
193d576824SJeremy L Thompson #include <stdbool.h>
203d576824SJeremy L Thompson #include <stdint.h>
2189c6efa4Sjeremylt #include <string.h>
2289c6efa4Sjeremylt #include "ceed-opt.h"
2389c6efa4Sjeremylt 
24f10650afSjeremylt //------------------------------------------------------------------------------
25f10650afSjeremylt // Setup Input/Output Fields
26f10650afSjeremylt //------------------------------------------------------------------------------
2789c6efa4Sjeremylt static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
28d1d35e2fSjeremylt                                        bool inOrOut, const CeedInt blk_size,
29d1d35e2fSjeremylt                                        CeedElemRestriction *blk_restr,
30d1d35e2fSjeremylt                                        CeedVector *full_evecs, CeedVector *e_vecs,
31d1d35e2fSjeremylt                                        CeedVector *q_vecs, CeedInt start_e,
32d1d35e2fSjeremylt                                        CeedInt num_fields, CeedInt Q) {
33d1d35e2fSjeremylt   CeedInt dim, ierr, num_comp, size, P;
3489c6efa4Sjeremylt   Ceed ceed;
35e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
3689c6efa4Sjeremylt   CeedBasis basis;
3789c6efa4Sjeremylt   CeedElemRestriction r;
38d1d35e2fSjeremylt   CeedOperatorField *op_fields;
39d1d35e2fSjeremylt   CeedQFunctionField *qf_fields;
4089c6efa4Sjeremylt   if (inOrOut) {
417e7773b5SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields);
42e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
437e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
44e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
4589c6efa4Sjeremylt   } else {
467e7773b5SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
47e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
487e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
49e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
5089c6efa4Sjeremylt   }
5189c6efa4Sjeremylt 
5289c6efa4Sjeremylt   // Loop over fields
53d1d35e2fSjeremylt   for (CeedInt i=0; i<num_fields; i++) {
54d1d35e2fSjeremylt     CeedEvalMode eval_mode;
55d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
56d1d35e2fSjeremylt     CeedChkBackend(ierr);
5789c6efa4Sjeremylt 
58d1d35e2fSjeremylt     if (eval_mode != CEED_EVAL_WEIGHT) {
59d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
60e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6189c6efa4Sjeremylt       Ceed ceed;
62e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
63d1d35e2fSjeremylt       CeedInt num_elem, elem_size, l_size, comp_stride;
64d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
65d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
66d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
67d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
68bd33150aSjeremylt 
693ac43b2cSJeremy L Thompson       bool strided;
70e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
713ac43b2cSJeremy L Thompson       if (strided) {
723ac43b2cSJeremy L Thompson         CeedInt strides[3];
73e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
74d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
75d1d35e2fSjeremylt                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
76e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
773ac43b2cSJeremy L Thompson       } else {
78bd33150aSjeremylt         const CeedInt *offsets = NULL;
79bd33150aSjeremylt         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
80e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
81d1d35e2fSjeremylt         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
82d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
83d1d35e2fSjeremylt                                                 blk_size, num_comp, comp_stride,
84d1d35e2fSjeremylt                                                 l_size, CEED_MEM_HOST,
85bd33150aSjeremylt                                                 CEED_COPY_VALUES, offsets,
86d1d35e2fSjeremylt                                                 &blk_restr[i+start_e]);
87e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
88e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
893ac43b2cSJeremy L Thompson       }
90d1d35e2fSjeremylt       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
91d1d35e2fSjeremylt                                              &full_evecs[i+start_e]);
92e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
9389c6efa4Sjeremylt     }
9489c6efa4Sjeremylt 
95d1d35e2fSjeremylt     switch(eval_mode) {
9689c6efa4Sjeremylt     case CEED_EVAL_NONE:
97d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
98d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]);
99d1d35e2fSjeremylt       CeedChkBackend(ierr);
100d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
101d1d35e2fSjeremylt       CeedChkBackend(ierr);
10289c6efa4Sjeremylt       break;
10389c6efa4Sjeremylt     case CEED_EVAL_INTERP:
104d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
10589c6efa4Sjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &P);
106e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
107d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]);
108d1d35e2fSjeremylt       CeedChkBackend(ierr);
109d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
110d1d35e2fSjeremylt       CeedChkBackend(ierr);
11189c6efa4Sjeremylt       break;
11289c6efa4Sjeremylt     case CEED_EVAL_GRAD:
113d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
114d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
115e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
11689c6efa4Sjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &P);
117e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
118d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]);
119e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
120d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
121d1d35e2fSjeremylt       CeedChkBackend(ierr);
12289c6efa4Sjeremylt       break;
12389c6efa4Sjeremylt     case CEED_EVAL_WEIGHT: // Only on input fields
124d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
125d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
126d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
127d1d35e2fSjeremylt                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
128e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
12989c6efa4Sjeremylt 
13089c6efa4Sjeremylt       break;
13189c6efa4Sjeremylt     case CEED_EVAL_DIV:
1325f67fadeSJeremy L Thompson       break; // Not implemented
13389c6efa4Sjeremylt     case CEED_EVAL_CURL:
1345f67fadeSJeremy L Thompson       break; // Not implemented
13589c6efa4Sjeremylt     }
13689c6efa4Sjeremylt   }
137e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
13889c6efa4Sjeremylt }
13989c6efa4Sjeremylt 
140f10650afSjeremylt //------------------------------------------------------------------------------
141f10650afSjeremylt // Setup Operator
142f10650afSjeremylt //------------------------------------------------------------------------------
14389c6efa4Sjeremylt static int CeedOperatorSetup_Opt(CeedOperator op) {
14489c6efa4Sjeremylt   int ierr;
145d1d35e2fSjeremylt   bool setup_done;
146d1d35e2fSjeremylt   ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr);
147d1d35e2fSjeremylt   if (setup_done) return CEED_ERROR_SUCCESS;
14889c6efa4Sjeremylt   Ceed ceed;
149e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
150d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
151d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
152d1d35e2fSjeremylt   const CeedInt blk_size = ceed_impl->blk_size;
15389c6efa4Sjeremylt   CeedOperator_Opt *impl;
154e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
15589c6efa4Sjeremylt   CeedQFunction qf;
156e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
157d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields;
158e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
1590b454692Sjeremylt   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
160d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
1617e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
1627e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
163e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
164d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
1657e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
1667e7773b5SJeremy L Thompson                                 &qf_output_fields);
167e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
16889c6efa4Sjeremylt 
16989c6efa4Sjeremylt   // Allocate
170d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
171e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
172d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs);
173e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
174d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data);
175e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
17689c6efa4Sjeremylt 
177*bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_state); CeedChkBackend(ierr);
178*bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
179*bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
180*bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
181*bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
18289c6efa4Sjeremylt 
183d1d35e2fSjeremylt   impl->num_e_vecs_in = num_input_fields;
184d1d35e2fSjeremylt   impl->num_e_vecs_out = num_output_fields;
18589c6efa4Sjeremylt 
18689c6efa4Sjeremylt   // Set up infield and outfield pointer arrays
18789c6efa4Sjeremylt   // Infields
188d1d35e2fSjeremylt   ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blk_size, impl->blk_restr,
189d1d35e2fSjeremylt                                      impl->e_vecs, impl->e_vecs_in,
190d1d35e2fSjeremylt                                      impl->q_vecs_in, 0, num_input_fields, Q);
191e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
19289c6efa4Sjeremylt   // Outfields
193d1d35e2fSjeremylt   ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blk_size, impl->blk_restr,
194d1d35e2fSjeremylt                                      impl->e_vecs, impl->e_vecs_out,
195d1d35e2fSjeremylt                                      impl->q_vecs_out, num_input_fields,
196d1d35e2fSjeremylt                                      num_output_fields, Q);
197e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
19889c6efa4Sjeremylt 
19916911fdaSjeremylt   // Identity QFunctions
2000b454692Sjeremylt   if (impl->is_identity_qf) {
201d1d35e2fSjeremylt     CeedEvalMode in_mode, out_mode;
202d1d35e2fSjeremylt     CeedQFunctionField *in_fields, *out_fields;
2037e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
204e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
2050b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
206d1d35e2fSjeremylt     CeedChkBackend(ierr);
2070b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
208d1d35e2fSjeremylt     CeedChkBackend(ierr);
209d1d35e2fSjeremylt 
2100b454692Sjeremylt     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
2110b454692Sjeremylt       impl->is_identity_restr_op = true;
2120b454692Sjeremylt     } else {
2130b454692Sjeremylt       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
2140b454692Sjeremylt       impl->q_vecs_out[0] = impl->q_vecs_in[0];
2150b454692Sjeremylt       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
21616911fdaSjeremylt     }
21716911fdaSjeremylt   }
21816911fdaSjeremylt 
219e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
22089c6efa4Sjeremylt 
221e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
22289c6efa4Sjeremylt }
22389c6efa4Sjeremylt 
224f10650afSjeremylt //------------------------------------------------------------------------------
225f10650afSjeremylt // Setup Input Fields
226f10650afSjeremylt //------------------------------------------------------------------------------
227d1d35e2fSjeremylt static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields,
228d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
229d1d35e2fSjeremylt     CeedVector in_vec, CeedOperator_Opt *impl, CeedRequest *request) {
2301d102b48SJeremy L Thompson   CeedInt ierr;
231d1d35e2fSjeremylt   CeedEvalMode eval_mode;
23289c6efa4Sjeremylt   CeedVector vec;
23389c6efa4Sjeremylt   uint64_t state;
23489c6efa4Sjeremylt 
235d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
236d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
237e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
238d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
23989c6efa4Sjeremylt     } else {
24089c6efa4Sjeremylt       // Get input vector
241d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
242d1d35e2fSjeremylt       CeedChkBackend(ierr);
24389c6efa4Sjeremylt       if (vec != CEED_VECTOR_ACTIVE) {
24489c6efa4Sjeremylt         // Restrict
245e15f9bd0SJeremy L Thompson         ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
246d1d35e2fSjeremylt         if (state != impl->input_state[i]) {
247d1d35e2fSjeremylt           ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
248d1d35e2fSjeremylt                                           vec, impl->e_vecs[i], request);
249e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
250d1d35e2fSjeremylt           impl->input_state[i] = state;
25189c6efa4Sjeremylt         }
25289c6efa4Sjeremylt       } else {
25389c6efa4Sjeremylt         // Set Qvec for CEED_EVAL_NONE
254d1d35e2fSjeremylt         if (eval_mode == CEED_EVAL_NONE) {
255d1d35e2fSjeremylt           ierr = CeedVectorGetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
256d1d35e2fSjeremylt                                     &impl->e_data[i]); CeedChkBackend(ierr);
257d1d35e2fSjeremylt           ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
25889c6efa4Sjeremylt                                     CEED_USE_POINTER,
259d1d35e2fSjeremylt                                     impl->e_data[i]); CeedChkBackend(ierr);
260d1d35e2fSjeremylt           ierr = CeedVectorRestoreArray(impl->e_vecs_in[i],
261d1d35e2fSjeremylt                                         &impl->e_data[i]); CeedChkBackend(ierr);
26289c6efa4Sjeremylt         }
26389c6efa4Sjeremylt       }
26489c6efa4Sjeremylt       // Get evec
265d1d35e2fSjeremylt       ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST,
266d1d35e2fSjeremylt                                     (const CeedScalar **) &impl->e_data[i]);
267e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
26889c6efa4Sjeremylt     }
26989c6efa4Sjeremylt   }
270e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2711d102b48SJeremy L Thompson }
27289c6efa4Sjeremylt 
273f10650afSjeremylt //------------------------------------------------------------------------------
274f10650afSjeremylt // Input Basis Action
275f10650afSjeremylt //------------------------------------------------------------------------------
2761d102b48SJeremy L Thompson static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
277d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
278d1d35e2fSjeremylt     CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active,
2791d102b48SJeremy L Thompson     CeedOperator_Opt *impl, CeedRequest *request) {
2801d102b48SJeremy L Thompson   CeedInt ierr;
281d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
282d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
283d1d35e2fSjeremylt   CeedEvalMode eval_mode;
2841d102b48SJeremy L Thompson   CeedBasis basis;
2851d102b48SJeremy L Thompson   CeedVector vec;
28689c6efa4Sjeremylt 
287d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
288d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
289d1d35e2fSjeremylt     CeedChkBackend(ierr);
2901d102b48SJeremy L Thompson     // Skip active input
291d1d35e2fSjeremylt     if (skip_active) {
2921d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
2931d102b48SJeremy L Thompson         continue;
2941d102b48SJeremy L Thompson     }
2951d102b48SJeremy L Thompson 
296d1d35e2fSjeremylt     CeedInt active_in = 0;
297d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
298d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
299e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
300d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
301e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
302d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
303e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
304d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
305d1d35e2fSjeremylt     CeedChkBackend(ierr);
30689c6efa4Sjeremylt     // Restrict block active input
30789c6efa4Sjeremylt     if (vec == CEED_VECTOR_ACTIVE) {
308d1d35e2fSjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size,
309d1d35e2fSjeremylt                                            CEED_NOTRANSPOSE, in_vec,
310d1d35e2fSjeremylt                                            impl->e_vecs_in[i], request);
311e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
312d1d35e2fSjeremylt       active_in = 1;
31389c6efa4Sjeremylt     }
31489c6efa4Sjeremylt     // Basis action
315d1d35e2fSjeremylt     switch(eval_mode) {
31689c6efa4Sjeremylt     case CEED_EVAL_NONE:
317d1d35e2fSjeremylt       if (!active_in) {
318d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
31989c6efa4Sjeremylt                                   CEED_USE_POINTER,
320d1d35e2fSjeremylt                                   &impl->e_data[i][e*Q*size]); CeedChkBackend(ierr);
32189c6efa4Sjeremylt       }
32289c6efa4Sjeremylt       break;
32389c6efa4Sjeremylt     case CEED_EVAL_INTERP:
324d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
325e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
326d1d35e2fSjeremylt       if (!active_in) {
327d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
32889c6efa4Sjeremylt                                   CEED_USE_POINTER,
329d1d35e2fSjeremylt                                   &impl->e_data[i][e*elem_size*size]);
330e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
33189c6efa4Sjeremylt       }
332d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
333d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
334d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
33589c6efa4Sjeremylt       break;
33689c6efa4Sjeremylt     case CEED_EVAL_GRAD:
337d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
338e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
339d1d35e2fSjeremylt       if (!active_in) {
340e15f9bd0SJeremy L Thompson         ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
341d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
34289c6efa4Sjeremylt                                   CEED_USE_POINTER,
343d1d35e2fSjeremylt                                   &impl->e_data[i][e*elem_size*size/dim]);
344e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
34589c6efa4Sjeremylt       }
346d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
347d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
348d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
34989c6efa4Sjeremylt       break;
35089c6efa4Sjeremylt     case CEED_EVAL_WEIGHT:
35189c6efa4Sjeremylt       break;  // No action
352bbfacfcdSjeremylt     // LCOV_EXCL_START
35389c6efa4Sjeremylt     case CEED_EVAL_DIV:
3541d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
355d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
356e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
3571d102b48SJeremy L Thompson       Ceed ceed;
358e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
359e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
360e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
361bbfacfcdSjeremylt       // LCOV_EXCL_STOP
3621d102b48SJeremy L Thompson     }
3631d102b48SJeremy L Thompson     }
3641d102b48SJeremy L Thompson   }
365e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3661d102b48SJeremy L Thompson }
36789c6efa4Sjeremylt 
368f10650afSjeremylt //------------------------------------------------------------------------------
369f10650afSjeremylt // Output Basis Action
370f10650afSjeremylt //------------------------------------------------------------------------------
3711d102b48SJeremy L Thompson static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
372d1d35e2fSjeremylt     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
373d1d35e2fSjeremylt     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
374d1d35e2fSjeremylt     CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl,
3751d102b48SJeremy L Thompson     CeedRequest *request) {
3761d102b48SJeremy L Thompson   CeedInt ierr;
377d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
378d1d35e2fSjeremylt   CeedEvalMode eval_mode;
3791d102b48SJeremy L Thompson   CeedBasis basis;
3801d102b48SJeremy L Thompson   CeedVector vec;
3811d102b48SJeremy L Thompson 
382d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
383d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
384d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
385e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
386d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
387e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
38889c6efa4Sjeremylt     // Basis action
389d1d35e2fSjeremylt     switch(eval_mode) {
39089c6efa4Sjeremylt     case CEED_EVAL_NONE:
39189c6efa4Sjeremylt       break; // No action
39289c6efa4Sjeremylt     case CEED_EVAL_INTERP:
393d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
394e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
395d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
396d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
397d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
39889c6efa4Sjeremylt       break;
39989c6efa4Sjeremylt     case CEED_EVAL_GRAD:
400d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
401e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
402d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
403d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
404d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
40589c6efa4Sjeremylt       break;
406c042f62fSJeremy L Thompson     // LCOV_EXCL_START
407bbfacfcdSjeremylt     case CEED_EVAL_WEIGHT: {
40889c6efa4Sjeremylt       Ceed ceed;
409e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
410e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
411e15f9bd0SJeremy L Thompson                        "CEED_EVAL_WEIGHT cannot be an output "
4121d102b48SJeremy L Thompson                        "evaluation mode");
41389c6efa4Sjeremylt     }
41489c6efa4Sjeremylt     case CEED_EVAL_DIV:
4151d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
4161d102b48SJeremy L Thompson       Ceed ceed;
417e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
418e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
419e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
420bbfacfcdSjeremylt       // LCOV_EXCL_STOP
4211d102b48SJeremy L Thompson     }
42289c6efa4Sjeremylt     }
42389c6efa4Sjeremylt     // Restrict output block
42489c6efa4Sjeremylt     // Get output vector
425d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
426e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
42789c6efa4Sjeremylt     if (vec == CEED_VECTOR_ACTIVE)
428d1d35e2fSjeremylt       vec = out_vec;
42989c6efa4Sjeremylt     // Restrict
430d1d35e2fSjeremylt     ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_e_vecs_in],
431d1d35e2fSjeremylt                                          e/blk_size, CEED_TRANSPOSE,
432d1d35e2fSjeremylt                                          impl->e_vecs_out[i], vec, request);
433e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
43489c6efa4Sjeremylt   }
435e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
43689c6efa4Sjeremylt }
43789c6efa4Sjeremylt 
438f10650afSjeremylt //------------------------------------------------------------------------------
439f10650afSjeremylt // Restore Input Vectors
440f10650afSjeremylt //------------------------------------------------------------------------------
441d1d35e2fSjeremylt static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields,
442d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
443187168c7SJeremy L Thompson     CeedOperator_Opt *impl) {
4441d102b48SJeremy L Thompson   CeedInt ierr;
445d1d35e2fSjeremylt   CeedEvalMode eval_mode;
4461d102b48SJeremy L Thompson 
447d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
448d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
449e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
450d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
45189c6efa4Sjeremylt     } else {
452d1d35e2fSjeremylt       ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i],
453d1d35e2fSjeremylt                                         (const CeedScalar **) &impl->e_data[i]);
454e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
45589c6efa4Sjeremylt     }
45689c6efa4Sjeremylt   }
457e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4581d102b48SJeremy L Thompson }
4591d102b48SJeremy L Thompson 
460f10650afSjeremylt //------------------------------------------------------------------------------
461f10650afSjeremylt // Operator Apply
462f10650afSjeremylt //------------------------------------------------------------------------------
463d1d35e2fSjeremylt static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec,
464d1d35e2fSjeremylt                                     CeedVector out_vec, CeedRequest *request) {
4651d102b48SJeremy L Thompson   int ierr;
4661d102b48SJeremy L Thompson   Ceed ceed;
467e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
468d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
469d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
470d1d35e2fSjeremylt   CeedInt blk_size = ceed_impl->blk_size;
4711d102b48SJeremy L Thompson   CeedOperator_Opt *impl;
472e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
473d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem;
474d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
475e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
476d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
4771d102b48SJeremy L Thompson   CeedQFunction qf;
478e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
479d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
4807e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
4817e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
482e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
483d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
4847e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
4857e7773b5SJeremy L Thompson                                 &qf_output_fields);
486e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
487d1d35e2fSjeremylt   CeedEvalMode eval_mode;
4881d102b48SJeremy L Thompson 
4891d102b48SJeremy L Thompson   // Setup
490e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
4911d102b48SJeremy L Thompson 
4920b454692Sjeremylt   // Restriction only operator
4930b454692Sjeremylt   if (impl->is_identity_restr_op) {
4940b454692Sjeremylt     for (CeedInt b=0; b<num_blks; b++) {
4950b454692Sjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE,
4960b454692Sjeremylt                                            in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr);
4970b454692Sjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE,
4980b454692Sjeremylt                                            impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr);
4990b454692Sjeremylt     }
5000b454692Sjeremylt     return CEED_ERROR_SUCCESS;
5010b454692Sjeremylt   }
5020b454692Sjeremylt 
5031d102b48SJeremy L Thompson   // Input Evecs and Restriction
504d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
505d1d35e2fSjeremylt                                      op_input_fields, in_vec, impl, request);
506e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
5071d102b48SJeremy L Thompson 
5081d102b48SJeremy L Thompson   // Output Lvecs, Evecs, and Qvecs
509d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
5101d102b48SJeremy L Thompson     // Set Qvec if needed
511d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
512e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
513d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_NONE) {
5141d102b48SJeremy L Thompson       // Set qvec to single block evec
515d1d35e2fSjeremylt       ierr = CeedVectorGetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
516d1d35e2fSjeremylt                                 &impl->e_data[i + num_input_fields]);
517e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
518d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
519d1d35e2fSjeremylt                                 CEED_USE_POINTER, impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
520d1d35e2fSjeremylt       ierr = CeedVectorRestoreArray(impl->e_vecs_out[i],
521d1d35e2fSjeremylt                                     &impl->e_data[i + num_input_fields]);
522e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
5231d102b48SJeremy L Thompson     }
5241d102b48SJeremy L Thompson   }
5251d102b48SJeremy L Thompson 
5261d102b48SJeremy L Thompson   // Loop through elements
527d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
5281d102b48SJeremy L Thompson     // Input basis apply
529d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
530d1d35e2fSjeremylt                                       num_input_fields, blk_size, in_vec, false,
531e15f9bd0SJeremy L Thompson                                       impl, request); CeedChkBackend(ierr);
5321d102b48SJeremy L Thompson 
5331d102b48SJeremy L Thompson     // Q function
5340b454692Sjeremylt     if (!impl->is_identity_qf) {
535d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
536e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
53716911fdaSjeremylt     }
5381d102b48SJeremy L Thompson 
5391d102b48SJeremy L Thompson     // Output basis apply and restrict
540d1d35e2fSjeremylt     ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields,
541d1d35e2fSjeremylt                                        blk_size, num_input_fields, num_output_fields,
542d1d35e2fSjeremylt                                        op, out_vec, impl, request);
543e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
5441d102b48SJeremy L Thompson   }
5451d102b48SJeremy L Thompson 
5461d102b48SJeremy L Thompson   // Restore input arrays
547d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
548d1d35e2fSjeremylt                                        op_input_fields, impl);
549e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
55089c6efa4Sjeremylt 
551e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
55289c6efa4Sjeremylt }
55389c6efa4Sjeremylt 
554f10650afSjeremylt //------------------------------------------------------------------------------
55570a7ffb3SJeremy L Thompson // Core code for linear QFunction assembly
556f10650afSjeremylt //------------------------------------------------------------------------------
55770a7ffb3SJeremy L Thompson static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op,
55870a7ffb3SJeremy L Thompson     bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
55970a7ffb3SJeremy L Thompson     CeedRequest *request) {
5601d102b48SJeremy L Thompson   int ierr;
5611d102b48SJeremy L Thompson   Ceed ceed;
562e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
563d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
564d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
565d1d35e2fSjeremylt   const CeedInt blk_size = ceed_impl->blk_size;
5661d102b48SJeremy L Thompson   CeedOperator_Opt *impl;
567e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
568d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
569d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
570e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
571d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
5721d102b48SJeremy L Thompson   CeedQFunction qf;
573e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
574d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
5757e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
5767e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
577e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
578d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
5797e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
5807e7773b5SJeremy L Thompson                                 &qf_output_fields);
581e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
582bb219a0fSJeremy L Thompson   CeedVector vec, lvec = impl->qf_lvec;
583bb219a0fSJeremy L Thompson   CeedInt num_active_in = impl->qf_num_active_in,
584bb219a0fSJeremy L Thompson           num_active_out = impl->qf_num_active_out;
585bb219a0fSJeremy L Thompson   CeedVector *active_in = impl->qf_active_in;
5861d102b48SJeremy L Thompson   CeedScalar *a, *tmp;
5871d102b48SJeremy L Thompson 
5881d102b48SJeremy L Thompson   // Setup
589e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
5901d102b48SJeremy L Thompson 
59116911fdaSjeremylt   // Check for identity
5920b454692Sjeremylt   if (impl->is_identity_qf)
59316911fdaSjeremylt     // LCOV_EXCL_START
594e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
595e15f9bd0SJeremy L Thompson                      "Assembling identity qfunctions not supported");
59616911fdaSjeremylt   // LCOV_EXCL_STOP
59716911fdaSjeremylt 
5981d102b48SJeremy L Thompson   // Input Evecs and Restriction
599d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
600d1d35e2fSjeremylt                                      op_input_fields, NULL, impl, request);
601e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
6021d102b48SJeremy L Thompson 
6031d102b48SJeremy L Thompson   // Count number of active input fields
604bb219a0fSJeremy L Thompson   if (!num_active_in) {
605d1d35e2fSjeremylt     for (CeedInt i=0; i<num_input_fields; i++) {
6061d102b48SJeremy L Thompson       // Get input vector
607d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
608d1d35e2fSjeremylt       CeedChkBackend(ierr);
6091d102b48SJeremy L Thompson       // Check if active input
6101d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE) {
611d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
612e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
613d1d35e2fSjeremylt         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
614d1d35e2fSjeremylt         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
615d1d35e2fSjeremylt         CeedChkBackend(ierr);
616d1d35e2fSjeremylt         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
6171d102b48SJeremy L Thompson         for (CeedInt field=0; field<size; field++) {
618d1d35e2fSjeremylt           ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
619e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
620d1d35e2fSjeremylt           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
621d1d35e2fSjeremylt                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
622e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
6231d102b48SJeremy L Thompson         }
624d1d35e2fSjeremylt         num_active_in += size;
625d1d35e2fSjeremylt         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
6261d102b48SJeremy L Thompson       }
62789c6efa4Sjeremylt     }
628bb219a0fSJeremy L Thompson     impl->qf_num_active_in = num_active_in;
629bb219a0fSJeremy L Thompson     impl->qf_active_in = active_in;
630bb219a0fSJeremy L Thompson   }
63189c6efa4Sjeremylt 
6321d102b48SJeremy L Thompson   // Count number of active output fields
633bb219a0fSJeremy L Thompson   if (!num_active_out) {
634d1d35e2fSjeremylt     for (CeedInt i=0; i<num_output_fields; i++) {
6351d102b48SJeremy L Thompson       // Get output vector
636d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
637e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6381d102b48SJeremy L Thompson       // Check if active output
6391d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE) {
640d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
641e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
642d1d35e2fSjeremylt         num_active_out += size;
6431d102b48SJeremy L Thompson       }
6441d102b48SJeremy L Thompson     }
645bb219a0fSJeremy L Thompson     impl->qf_num_active_out = num_active_out;
646bb219a0fSJeremy L Thompson   }
6471d102b48SJeremy L Thompson 
6481d102b48SJeremy L Thompson   // Check sizes
649d1d35e2fSjeremylt   if (!num_active_in || !num_active_out)
6501d102b48SJeremy L Thompson     // LCOV_EXCL_START
651e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
652e15f9bd0SJeremy L Thompson                      "Cannot assemble QFunction without active inputs "
6531d102b48SJeremy L Thompson                      "and outputs");
6541d102b48SJeremy L Thompson   // LCOV_EXCL_STOP
6551d102b48SJeremy L Thompson 
6561d102b48SJeremy L Thompson   // Setup lvec
657bb219a0fSJeremy L Thompson   if (!lvec) {
658d1d35e2fSjeremylt     ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
659e15f9bd0SJeremy L Thompson                             &lvec); CeedChkBackend(ierr);
660bb219a0fSJeremy L Thompson     impl->qf_lvec = lvec;
661bb219a0fSJeremy L Thompson   }
662e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
6631d102b48SJeremy L Thompson 
66470a7ffb3SJeremy L Thompson   // Build objects if needed
665d1d35e2fSjeremylt   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
66670a7ffb3SJeremy L Thompson   if (build_objects) {
66770a7ffb3SJeremy L Thompson     // Create output restriction
668d1d35e2fSjeremylt     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
669d1d35e2fSjeremylt                                             num_active_in*num_active_out,
670d1d35e2fSjeremylt                                             num_active_in*num_active_out*num_elem*Q,
671e15f9bd0SJeremy L Thompson                                             strides, rstr); CeedChkBackend(ierr);
6721d102b48SJeremy L Thompson     // Create assembled vector
673d1d35e2fSjeremylt     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
674e15f9bd0SJeremy L Thompson                             assembled); CeedChkBackend(ierr);
67570a7ffb3SJeremy L Thompson   }
6761d102b48SJeremy L Thompson 
6771d102b48SJeremy L Thompson   // Loop through elements
678d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
6791d102b48SJeremy L Thompson     // Input basis apply
680d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
681d1d35e2fSjeremylt                                       num_input_fields, blk_size, NULL, true,
682e15f9bd0SJeremy L Thompson                                       impl, request); CeedChkBackend(ierr);
6831d102b48SJeremy L Thompson 
6841d102b48SJeremy L Thompson     // Assemble QFunction
685d1d35e2fSjeremylt     for (CeedInt in=0; in<num_active_in; in++) {
6861d102b48SJeremy L Thompson       // Set Inputs
687d1d35e2fSjeremylt       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
688d1d35e2fSjeremylt       if (num_active_in > 1) {
689d1d35e2fSjeremylt         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
690e15f9bd0SJeremy L Thompson                                   0.0); CeedChkBackend(ierr);
69142ea3801Sjeremylt       }
6921d102b48SJeremy L Thompson       // Set Outputs
693d1d35e2fSjeremylt       for (CeedInt out=0; out<num_output_fields; out++) {
6941d102b48SJeremy L Thompson         // Get output vector
695d1d35e2fSjeremylt         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
696e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
6971d102b48SJeremy L Thompson         // Check if active output
6981d102b48SJeremy L Thompson         if (vec == CEED_VECTOR_ACTIVE) {
699d1d35e2fSjeremylt           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
700e15f9bd0SJeremy L Thompson                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
701d1d35e2fSjeremylt           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
702e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
703d1d35e2fSjeremylt           a += size*Q*blk_size; // Advance the pointer by the size of the output
7041d102b48SJeremy L Thompson         }
7051d102b48SJeremy L Thompson       }
7061d102b48SJeremy L Thompson       // Apply QFunction
707d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
708e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
7091d102b48SJeremy L Thompson     }
7101d102b48SJeremy L Thompson   }
7111d102b48SJeremy L Thompson 
7121d102b48SJeremy L Thompson   // Un-set output Qvecs to prevent accidental overwrite of Assembled
713d1d35e2fSjeremylt   for (CeedInt out=0; out<num_output_fields; out++) {
7141d102b48SJeremy L Thompson     // Get output vector
715d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
716e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
7171d102b48SJeremy L Thompson     // Check if active output
7181d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
719d1d35e2fSjeremylt       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
720e15f9bd0SJeremy L Thompson                          NULL); CeedChkBackend(ierr);
7211d102b48SJeremy L Thompson     }
7221d102b48SJeremy L Thompson   }
7231d102b48SJeremy L Thompson 
7241d102b48SJeremy L Thompson   // Restore input arrays
725d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
726d1d35e2fSjeremylt                                        op_input_fields, impl);
727e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
7281d102b48SJeremy L Thompson 
7291d102b48SJeremy L Thompson   // Output blocked restriction
730e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr);
731e15f9bd0SJeremy L Thompson   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
732bb219a0fSJeremy L Thompson   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
733bb219a0fSJeremy L Thompson   if (!blk_rstr) {
734d1d35e2fSjeremylt     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
735d1d35e2fSjeremylt            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
736d1d35e2fSjeremylt            strides, &blk_rstr); CeedChkBackend(ierr);
737bb219a0fSJeremy L Thompson     impl->qf_blk_rstr = blk_rstr;
738bb219a0fSJeremy L Thompson   }
739d1d35e2fSjeremylt   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, lvec, *assembled,
740e15f9bd0SJeremy L Thompson                                   request); CeedChkBackend(ierr);
7411d102b48SJeremy L Thompson 
742e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
74389c6efa4Sjeremylt }
74489c6efa4Sjeremylt 
745f10650afSjeremylt //------------------------------------------------------------------------------
74670a7ffb3SJeremy L Thompson // Assemble Linear QFunction
74770a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
74870a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op,
74970a7ffb3SJeremy L Thompson     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
75070a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr,
75170a7ffb3SJeremy L Thompson          request);
75270a7ffb3SJeremy L Thompson }
75370a7ffb3SJeremy L Thompson 
75470a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
75570a7ffb3SJeremy L Thompson // Update Assembled Linear QFunction
75670a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
75770a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op,
75870a7ffb3SJeremy L Thompson     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
75970a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled,
76070a7ffb3SJeremy L Thompson          &rstr, request);
76170a7ffb3SJeremy L Thompson }
76270a7ffb3SJeremy L Thompson 
76370a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
764f10650afSjeremylt // Operator Destroy
765f10650afSjeremylt //------------------------------------------------------------------------------
766f10650afSjeremylt static int CeedOperatorDestroy_Opt(CeedOperator op) {
767f10650afSjeremylt   int ierr;
768f10650afSjeremylt   CeedOperator_Opt *impl;
769e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
770f10650afSjeremylt 
771d1d35e2fSjeremylt   for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) {
772d1d35e2fSjeremylt     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
773d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr);
774f10650afSjeremylt   }
775d1d35e2fSjeremylt   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
776d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr);
777d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr);
778d1d35e2fSjeremylt   ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr);
779f10650afSjeremylt 
780d1d35e2fSjeremylt   for (CeedInt i=0; i<impl->num_e_vecs_in; i++) {
781d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
782d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
783f10650afSjeremylt   }
784d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
785d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
786f10650afSjeremylt 
787d1d35e2fSjeremylt   for (CeedInt i=0; i<impl->num_e_vecs_out; i++) {
788d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
789d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
790f10650afSjeremylt   }
791d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
792d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
793f10650afSjeremylt 
794bb219a0fSJeremy L Thompson   // QFunction assembly data
795bb219a0fSJeremy L Thompson   for (CeedInt i=0; i<impl->qf_num_active_in; i++) {
796bb219a0fSJeremy L Thompson     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
797bb219a0fSJeremy L Thompson   }
798bb219a0fSJeremy L Thompson   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
799bb219a0fSJeremy L Thompson   ierr = CeedVectorDestroy(&impl->qf_lvec); CeedChkBackend(ierr);
800bb219a0fSJeremy L Thompson   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
801bb219a0fSJeremy L Thompson 
802e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
803e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
804f10650afSjeremylt }
805f10650afSjeremylt 
806f10650afSjeremylt //------------------------------------------------------------------------------
807f10650afSjeremylt // Operator Create
808f10650afSjeremylt //------------------------------------------------------------------------------
80989c6efa4Sjeremylt int CeedOperatorCreate_Opt(CeedOperator op) {
81089c6efa4Sjeremylt   int ierr;
81189c6efa4Sjeremylt   Ceed ceed;
812e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
813d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
814d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
815d1d35e2fSjeremylt   CeedInt blk_size = ceed_impl->blk_size;
81689c6efa4Sjeremylt   CeedOperator_Opt *impl;
81789c6efa4Sjeremylt 
818e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
819e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
82089c6efa4Sjeremylt 
821d1d35e2fSjeremylt   if (blk_size != 1 && blk_size != 8)
82282946b17Sjeremylt     // LCOV_EXCL_START
823e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
824d1d35e2fSjeremylt                      "Opt backend cannot use blocksize: %d", blk_size);
82582946b17Sjeremylt   // LCOV_EXCL_STOP
82682946b17Sjeremylt 
82780ac2e43SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
82880ac2e43SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunction_Opt);
829e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
83070a7ffb3SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op,
83170a7ffb3SJeremy L Thompson                                 "LinearAssembleQFunctionUpdate",
83270a7ffb3SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunctionUpdate_Opt);
83370a7ffb3SJeremy L Thompson   CeedChkBackend(ierr);
834cae8b89aSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
835e15f9bd0SJeremy L Thompson                                 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr);
83689c6efa4Sjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
837e15f9bd0SJeremy L Thompson                                 CeedOperatorDestroy_Opt); CeedChkBackend(ierr);
838e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
83989c6efa4Sjeremylt }
840f10650afSjeremylt //------------------------------------------------------------------------------
841