xref: /libCEED/backends/opt/ceed-opt-operator.c (revision ebbcc8a346ef79f72cc33926c55f927f02fd96aa)
189c6efa4Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
289c6efa4Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
389c6efa4Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
489c6efa4Sjeremylt //
589c6efa4Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
689c6efa4Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
789c6efa4Sjeremylt // element discretizations for exascale applications. For more information and
889c6efa4Sjeremylt // source code availability see http://github.com/ceed.
989c6efa4Sjeremylt //
1089c6efa4Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1189c6efa4Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1289c6efa4Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1389c6efa4Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1489c6efa4Sjeremylt // software, applications, hardware, advanced system engineering and early
1589c6efa4Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1689c6efa4Sjeremylt 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
193d576824SJeremy L Thompson #include <stdbool.h>
203d576824SJeremy L Thompson #include <stdint.h>
2189c6efa4Sjeremylt #include <string.h>
2289c6efa4Sjeremylt #include "ceed-opt.h"
2389c6efa4Sjeremylt 
24f10650afSjeremylt //------------------------------------------------------------------------------
25f10650afSjeremylt // Setup Input/Output Fields
26f10650afSjeremylt //------------------------------------------------------------------------------
2789c6efa4Sjeremylt static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
284fc1f125SJeremy L Thompson                                        bool is_input, const CeedInt blk_size,
29d1d35e2fSjeremylt                                        CeedElemRestriction *blk_restr,
304fc1f125SJeremy L Thompson                                        CeedVector *e_vecs_full, CeedVector *e_vecs,
31d1d35e2fSjeremylt                                        CeedVector *q_vecs, CeedInt start_e,
32d1d35e2fSjeremylt                                        CeedInt num_fields, CeedInt Q) {
33*ebbcc8a3SJeremy L Thompson   CeedInt ierr, num_comp, size, P;
3489c6efa4Sjeremylt   Ceed ceed;
35e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
3689c6efa4Sjeremylt   CeedBasis basis;
3789c6efa4Sjeremylt   CeedElemRestriction r;
38d1d35e2fSjeremylt   CeedOperatorField *op_fields;
39d1d35e2fSjeremylt   CeedQFunctionField *qf_fields;
404fc1f125SJeremy L Thompson   if (is_input) {
417e7773b5SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
42e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
437e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
44e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
454fc1f125SJeremy L Thompson   } else {
464fc1f125SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields);
474fc1f125SJeremy L Thompson     CeedChkBackend(ierr);
484fc1f125SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
494fc1f125SJeremy L Thompson     CeedChkBackend(ierr);
5089c6efa4Sjeremylt   }
5189c6efa4Sjeremylt 
5289c6efa4Sjeremylt   // Loop over fields
53d1d35e2fSjeremylt   for (CeedInt i=0; i<num_fields; i++) {
54d1d35e2fSjeremylt     CeedEvalMode eval_mode;
55d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
56d1d35e2fSjeremylt     CeedChkBackend(ierr);
5789c6efa4Sjeremylt 
58d1d35e2fSjeremylt     if (eval_mode != CEED_EVAL_WEIGHT) {
59d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
60e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6189c6efa4Sjeremylt       Ceed ceed;
62e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
63d1d35e2fSjeremylt       CeedInt num_elem, elem_size, l_size, comp_stride;
64d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
65d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
66d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
67d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
68bd33150aSjeremylt 
693ac43b2cSJeremy L Thompson       bool strided;
70e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
713ac43b2cSJeremy L Thompson       if (strided) {
723ac43b2cSJeremy L Thompson         CeedInt strides[3];
73e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
74d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
75d1d35e2fSjeremylt                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
76e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
773ac43b2cSJeremy L Thompson       } else {
78bd33150aSjeremylt         const CeedInt *offsets = NULL;
79bd33150aSjeremylt         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
80e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
81d1d35e2fSjeremylt         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
82d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
83d1d35e2fSjeremylt                                                 blk_size, num_comp, comp_stride,
84d1d35e2fSjeremylt                                                 l_size, CEED_MEM_HOST,
85bd33150aSjeremylt                                                 CEED_COPY_VALUES, offsets,
86d1d35e2fSjeremylt                                                 &blk_restr[i+start_e]);
87e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
88e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
893ac43b2cSJeremy L Thompson       }
90d1d35e2fSjeremylt       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
914fc1f125SJeremy L Thompson                                              &e_vecs_full[i+start_e]);
92e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
9389c6efa4Sjeremylt     }
9489c6efa4Sjeremylt 
95d1d35e2fSjeremylt     switch(eval_mode) {
9689c6efa4Sjeremylt     case CEED_EVAL_NONE:
97d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
98d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]);
99d1d35e2fSjeremylt       CeedChkBackend(ierr);
100d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
101d1d35e2fSjeremylt       CeedChkBackend(ierr);
10289c6efa4Sjeremylt       break;
10389c6efa4Sjeremylt     case CEED_EVAL_INTERP:
104*ebbcc8a3SJeremy L Thompson       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
105d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
106*ebbcc8a3SJeremy L Thompson       ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr);
107*ebbcc8a3SJeremy L Thompson       ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
108*ebbcc8a3SJeremy L Thompson       ierr = CeedVectorCreate(ceed, P*num_comp*blk_size, &e_vecs[i]);
109d1d35e2fSjeremylt       CeedChkBackend(ierr);
110d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
111d1d35e2fSjeremylt       CeedChkBackend(ierr);
11289c6efa4Sjeremylt       break;
11389c6efa4Sjeremylt     case CEED_EVAL_GRAD:
114d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
115d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
116*ebbcc8a3SJeremy L Thompson       ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr);
117*ebbcc8a3SJeremy L Thompson       ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
118*ebbcc8a3SJeremy L Thompson       ierr = CeedVectorCreate(ceed, P*num_comp*blk_size, &e_vecs[i]);
119e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
120d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
121d1d35e2fSjeremylt       CeedChkBackend(ierr);
12289c6efa4Sjeremylt       break;
12389c6efa4Sjeremylt     case CEED_EVAL_WEIGHT: // Only on input fields
124d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
125d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
126d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
127d1d35e2fSjeremylt                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
128e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
12989c6efa4Sjeremylt 
13089c6efa4Sjeremylt       break;
13189c6efa4Sjeremylt     case CEED_EVAL_DIV:
1325f67fadeSJeremy L Thompson       break; // Not implemented
13389c6efa4Sjeremylt     case CEED_EVAL_CURL:
1345f67fadeSJeremy L Thompson       break; // Not implemented
13589c6efa4Sjeremylt     }
1369c774eddSJeremy L Thompson     if (is_input && !!e_vecs[i]) {
1379c774eddSJeremy L Thompson       ierr = CeedVectorSetArray(e_vecs[i], CEED_MEM_HOST,
1389c774eddSJeremy L Thompson                                 CEED_COPY_VALUES, NULL); CeedChkBackend(ierr);
1399c774eddSJeremy L Thompson     }
14089c6efa4Sjeremylt   }
141e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
14289c6efa4Sjeremylt }
14389c6efa4Sjeremylt 
144f10650afSjeremylt //------------------------------------------------------------------------------
145f10650afSjeremylt // Setup Operator
146f10650afSjeremylt //------------------------------------------------------------------------------
14789c6efa4Sjeremylt static int CeedOperatorSetup_Opt(CeedOperator op) {
14889c6efa4Sjeremylt   int ierr;
1498c1105f8SJeremy L Thompson   bool is_setup_done;
1508c1105f8SJeremy L Thompson   ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr);
1518c1105f8SJeremy L Thompson   if (is_setup_done) return CEED_ERROR_SUCCESS;
15289c6efa4Sjeremylt   Ceed ceed;
153e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
154d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
155d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
156d1d35e2fSjeremylt   const CeedInt blk_size = ceed_impl->blk_size;
15789c6efa4Sjeremylt   CeedOperator_Opt *impl;
158e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
15989c6efa4Sjeremylt   CeedQFunction qf;
160e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
161d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields;
162e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
1630b454692Sjeremylt   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
164d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
1657e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
1667e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
167e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
168d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
1697e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
1707e7773b5SJeremy L Thompson                                 &qf_output_fields);
171e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
17289c6efa4Sjeremylt 
17389c6efa4Sjeremylt   // Allocate
174d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
175e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1764fc1f125SJeremy L Thompson   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full);
177e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
17889c6efa4Sjeremylt 
1794fc1f125SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr);
180bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
181bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
182bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
183bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
18489c6efa4Sjeremylt 
1854fc1f125SJeremy L Thompson   impl->num_inputs = num_input_fields;
1864fc1f125SJeremy L Thompson   impl->num_outputs = num_output_fields;
18789c6efa4Sjeremylt 
18889c6efa4Sjeremylt   // Set up infield and outfield pointer arrays
18989c6efa4Sjeremylt   // Infields
1904fc1f125SJeremy L Thompson   ierr = CeedOperatorSetupFields_Opt(qf, op, true, blk_size, impl->blk_restr,
1914fc1f125SJeremy L Thompson                                      impl->e_vecs_full, impl->e_vecs_in,
192d1d35e2fSjeremylt                                      impl->q_vecs_in, 0, num_input_fields, Q);
193e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
19489c6efa4Sjeremylt   // Outfields
1954fc1f125SJeremy L Thompson   ierr = CeedOperatorSetupFields_Opt(qf, op, false, blk_size, impl->blk_restr,
1964fc1f125SJeremy L Thompson                                      impl->e_vecs_full, impl->e_vecs_out,
197d1d35e2fSjeremylt                                      impl->q_vecs_out, num_input_fields,
198d1d35e2fSjeremylt                                      num_output_fields, Q);
199e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
20089c6efa4Sjeremylt 
20116911fdaSjeremylt   // Identity QFunctions
2020b454692Sjeremylt   if (impl->is_identity_qf) {
203d1d35e2fSjeremylt     CeedEvalMode in_mode, out_mode;
204d1d35e2fSjeremylt     CeedQFunctionField *in_fields, *out_fields;
2057e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
206e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
2070b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
208d1d35e2fSjeremylt     CeedChkBackend(ierr);
2090b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
210d1d35e2fSjeremylt     CeedChkBackend(ierr);
211d1d35e2fSjeremylt 
2120b454692Sjeremylt     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
2130b454692Sjeremylt       impl->is_identity_restr_op = true;
2140b454692Sjeremylt     } else {
2150b454692Sjeremylt       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
2160b454692Sjeremylt       impl->q_vecs_out[0] = impl->q_vecs_in[0];
2170b454692Sjeremylt       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
21816911fdaSjeremylt     }
21916911fdaSjeremylt   }
22016911fdaSjeremylt 
221e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
22289c6efa4Sjeremylt 
223e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
22489c6efa4Sjeremylt }
22589c6efa4Sjeremylt 
226f10650afSjeremylt //------------------------------------------------------------------------------
227f10650afSjeremylt // Setup Input Fields
228f10650afSjeremylt //------------------------------------------------------------------------------
229d1d35e2fSjeremylt static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields,
230d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
231a0162de9SJeremy L Thompson     CeedVector in_vec, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
232a0162de9SJeremy L Thompson     CeedRequest *request) {
2331d102b48SJeremy L Thompson   CeedInt ierr;
234d1d35e2fSjeremylt   CeedEvalMode eval_mode;
23589c6efa4Sjeremylt   CeedVector vec;
23689c6efa4Sjeremylt   uint64_t state;
23789c6efa4Sjeremylt 
238d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
239d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
240e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
241d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
24289c6efa4Sjeremylt     } else {
24389c6efa4Sjeremylt       // Get input vector
244d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
245d1d35e2fSjeremylt       CeedChkBackend(ierr);
24689c6efa4Sjeremylt       if (vec != CEED_VECTOR_ACTIVE) {
24789c6efa4Sjeremylt         // Restrict
248e15f9bd0SJeremy L Thompson         ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
2494fc1f125SJeremy L Thompson         if (state != impl->input_states[i]) {
250d1d35e2fSjeremylt           ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
2514fc1f125SJeremy L Thompson                                           vec, impl->e_vecs_full[i], request);
252e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
2534fc1f125SJeremy L Thompson           impl->input_states[i] = state;
25489c6efa4Sjeremylt         }
25589c6efa4Sjeremylt         // Get evec
2564fc1f125SJeremy L Thompson         ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST,
257a0162de9SJeremy L Thompson                                       (const CeedScalar **) &e_data[i]);
258e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
2599c774eddSJeremy L Thompson       } else {
2609c774eddSJeremy L Thompson         // Set Qvec for CEED_EVAL_NONE
2619c774eddSJeremy L Thompson         if (eval_mode == CEED_EVAL_NONE) {
2629c774eddSJeremy L Thompson           ierr = CeedVectorGetArrayRead(impl->e_vecs_in[i], CEED_MEM_HOST,
2639c774eddSJeremy L Thompson                                         (const CeedScalar **)&e_data[i]);
2649c774eddSJeremy L Thompson           CeedChkBackend(ierr);
2659c774eddSJeremy L Thompson           ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
2669c774eddSJeremy L Thompson                                     CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr);
2679c774eddSJeremy L Thompson           ierr = CeedVectorRestoreArrayRead(impl->e_vecs_in[i],
2689c774eddSJeremy L Thompson                                             (const CeedScalar **)&e_data[i]);
2699c774eddSJeremy L Thompson           CeedChkBackend(ierr);
2709c774eddSJeremy L Thompson         }
2719c774eddSJeremy L Thompson       }
27289c6efa4Sjeremylt     }
27389c6efa4Sjeremylt   }
274e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2751d102b48SJeremy L Thompson }
27689c6efa4Sjeremylt 
277f10650afSjeremylt //------------------------------------------------------------------------------
278f10650afSjeremylt // Input Basis Action
279f10650afSjeremylt //------------------------------------------------------------------------------
2801d102b48SJeremy L Thompson static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
281d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
282d1d35e2fSjeremylt     CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active,
283a0162de9SJeremy L Thompson     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
284a0162de9SJeremy L Thompson     CeedRequest *request) {
2851d102b48SJeremy L Thompson   CeedInt ierr;
286d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
287d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
288d1d35e2fSjeremylt   CeedEvalMode eval_mode;
2891d102b48SJeremy L Thompson   CeedBasis basis;
2901d102b48SJeremy L Thompson   CeedVector vec;
29189c6efa4Sjeremylt 
292d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
293d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
294d1d35e2fSjeremylt     CeedChkBackend(ierr);
2951d102b48SJeremy L Thompson     // Skip active input
296d1d35e2fSjeremylt     if (skip_active) {
2971d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
2981d102b48SJeremy L Thompson         continue;
2991d102b48SJeremy L Thompson     }
3001d102b48SJeremy L Thompson 
301d1d35e2fSjeremylt     CeedInt active_in = 0;
302d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
303d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
304e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
305d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
306e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
307d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
308e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
309d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
310d1d35e2fSjeremylt     CeedChkBackend(ierr);
31189c6efa4Sjeremylt     // Restrict block active input
31289c6efa4Sjeremylt     if (vec == CEED_VECTOR_ACTIVE) {
313d1d35e2fSjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size,
314d1d35e2fSjeremylt                                            CEED_NOTRANSPOSE, in_vec,
315d1d35e2fSjeremylt                                            impl->e_vecs_in[i], request);
316e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
317d1d35e2fSjeremylt       active_in = 1;
31889c6efa4Sjeremylt     }
31989c6efa4Sjeremylt     // Basis action
320d1d35e2fSjeremylt     switch(eval_mode) {
32189c6efa4Sjeremylt     case CEED_EVAL_NONE:
322d1d35e2fSjeremylt       if (!active_in) {
323d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
324a0162de9SJeremy L Thompson                                   CEED_USE_POINTER, &e_data[i][e*Q*size]);
325a0162de9SJeremy L Thompson         CeedChkBackend(ierr);
32689c6efa4Sjeremylt       }
32789c6efa4Sjeremylt       break;
32889c6efa4Sjeremylt     case CEED_EVAL_INTERP:
329d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
330e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
331d1d35e2fSjeremylt       if (!active_in) {
332d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
333a0162de9SJeremy L Thompson                                   CEED_USE_POINTER, &e_data[i][e*elem_size*size]);
334e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
33589c6efa4Sjeremylt       }
336d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
337d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
338d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
33989c6efa4Sjeremylt       break;
34089c6efa4Sjeremylt     case CEED_EVAL_GRAD:
341d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
342e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
343d1d35e2fSjeremylt       if (!active_in) {
344e15f9bd0SJeremy L Thompson         ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
345d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
34689c6efa4Sjeremylt                                   CEED_USE_POINTER,
347a0162de9SJeremy L Thompson                                   &e_data[i][e*elem_size*size/dim]);
348e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
34989c6efa4Sjeremylt       }
350d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
351d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
352d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
35389c6efa4Sjeremylt       break;
35489c6efa4Sjeremylt     case CEED_EVAL_WEIGHT:
35589c6efa4Sjeremylt       break;  // No action
356bbfacfcdSjeremylt     // LCOV_EXCL_START
35789c6efa4Sjeremylt     case CEED_EVAL_DIV:
3581d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
359d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
360e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
3611d102b48SJeremy L Thompson       Ceed ceed;
362e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
363e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
364e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
365bbfacfcdSjeremylt       // LCOV_EXCL_STOP
3661d102b48SJeremy L Thompson     }
3671d102b48SJeremy L Thompson     }
3681d102b48SJeremy L Thompson   }
369e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3701d102b48SJeremy L Thompson }
37189c6efa4Sjeremylt 
372f10650afSjeremylt //------------------------------------------------------------------------------
373f10650afSjeremylt // Output Basis Action
374f10650afSjeremylt //------------------------------------------------------------------------------
3751d102b48SJeremy L Thompson static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
376d1d35e2fSjeremylt     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
377d1d35e2fSjeremylt     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
378d1d35e2fSjeremylt     CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl,
3791d102b48SJeremy L Thompson     CeedRequest *request) {
3801d102b48SJeremy L Thompson   CeedInt ierr;
381d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
382d1d35e2fSjeremylt   CeedEvalMode eval_mode;
3831d102b48SJeremy L Thompson   CeedBasis basis;
3841d102b48SJeremy L Thompson   CeedVector vec;
3851d102b48SJeremy L Thompson 
386d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
387d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
388d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
389e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
390d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
391e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
39289c6efa4Sjeremylt     // Basis action
393d1d35e2fSjeremylt     switch(eval_mode) {
39489c6efa4Sjeremylt     case CEED_EVAL_NONE:
39589c6efa4Sjeremylt       break; // No action
39689c6efa4Sjeremylt     case CEED_EVAL_INTERP:
397d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
398e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
399d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
400d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
401d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
40289c6efa4Sjeremylt       break;
40389c6efa4Sjeremylt     case CEED_EVAL_GRAD:
404d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
405e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
406d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
407d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
408d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
40989c6efa4Sjeremylt       break;
410c042f62fSJeremy L Thompson     // LCOV_EXCL_START
411bbfacfcdSjeremylt     case CEED_EVAL_WEIGHT: {
41289c6efa4Sjeremylt       Ceed ceed;
413e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
414e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
415e15f9bd0SJeremy L Thompson                        "CEED_EVAL_WEIGHT cannot be an output "
4161d102b48SJeremy L Thompson                        "evaluation mode");
41789c6efa4Sjeremylt     }
41889c6efa4Sjeremylt     case CEED_EVAL_DIV:
4191d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
4201d102b48SJeremy L Thompson       Ceed ceed;
421e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
422e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
423e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
424bbfacfcdSjeremylt       // LCOV_EXCL_STOP
4251d102b48SJeremy L Thompson     }
42689c6efa4Sjeremylt     }
42789c6efa4Sjeremylt     // Restrict output block
42889c6efa4Sjeremylt     // Get output vector
429d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
430e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
43189c6efa4Sjeremylt     if (vec == CEED_VECTOR_ACTIVE)
432d1d35e2fSjeremylt       vec = out_vec;
43389c6efa4Sjeremylt     // Restrict
4344fc1f125SJeremy L Thompson     ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_inputs],
435d1d35e2fSjeremylt                                          e/blk_size, CEED_TRANSPOSE,
436d1d35e2fSjeremylt                                          impl->e_vecs_out[i], vec, request);
437e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
43889c6efa4Sjeremylt   }
439e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
44089c6efa4Sjeremylt }
44189c6efa4Sjeremylt 
442f10650afSjeremylt //------------------------------------------------------------------------------
443f10650afSjeremylt // Restore Input Vectors
444f10650afSjeremylt //------------------------------------------------------------------------------
445d1d35e2fSjeremylt static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields,
446d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
447a0162de9SJeremy L Thompson     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) {
4481d102b48SJeremy L Thompson   CeedInt ierr;
4491d102b48SJeremy L Thompson 
450d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
4519c774eddSJeremy L Thompson     CeedEvalMode eval_mode;
4529c774eddSJeremy L Thompson     CeedVector vec;
453d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
454e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
4559c774eddSJeremy L Thompson     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
4569c774eddSJeremy L Thompson     CeedChkBackend(ierr);
4579c774eddSJeremy L Thompson     if (eval_mode != CEED_EVAL_WEIGHT && vec != CEED_VECTOR_ACTIVE) {
4584fc1f125SJeremy L Thompson       ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i],
459a0162de9SJeremy L Thompson                                         (const CeedScalar **) &e_data[i]);
460e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
46189c6efa4Sjeremylt     }
46289c6efa4Sjeremylt   }
463e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4641d102b48SJeremy L Thompson }
4651d102b48SJeremy L Thompson 
466f10650afSjeremylt //------------------------------------------------------------------------------
467f10650afSjeremylt // Operator Apply
468f10650afSjeremylt //------------------------------------------------------------------------------
469d1d35e2fSjeremylt static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec,
470d1d35e2fSjeremylt                                     CeedVector out_vec, CeedRequest *request) {
4711d102b48SJeremy L Thompson   int ierr;
4721d102b48SJeremy L Thompson   Ceed ceed;
473e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
474d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
475d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
476d1d35e2fSjeremylt   CeedInt blk_size = ceed_impl->blk_size;
4771d102b48SJeremy L Thompson   CeedOperator_Opt *impl;
478e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
479d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem;
480d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
481e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
482d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
4831d102b48SJeremy L Thompson   CeedQFunction qf;
484e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
485d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
4867e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
4877e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
488e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
489d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
4907e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
4917e7773b5SJeremy L Thompson                                 &qf_output_fields);
492e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
493d1d35e2fSjeremylt   CeedEvalMode eval_mode;
494a0162de9SJeremy L Thompson   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
4951d102b48SJeremy L Thompson 
4961d102b48SJeremy L Thompson   // Setup
497e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
4981d102b48SJeremy L Thompson 
4990b454692Sjeremylt   // Restriction only operator
5000b454692Sjeremylt   if (impl->is_identity_restr_op) {
5010b454692Sjeremylt     for (CeedInt b=0; b<num_blks; b++) {
5020b454692Sjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE,
5030b454692Sjeremylt                                            in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr);
5040b454692Sjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE,
5050b454692Sjeremylt                                            impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr);
5060b454692Sjeremylt     }
5070b454692Sjeremylt     return CEED_ERROR_SUCCESS;
5080b454692Sjeremylt   }
5090b454692Sjeremylt 
5101d102b48SJeremy L Thompson   // Input Evecs and Restriction
511d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
512a0162de9SJeremy L Thompson                                      op_input_fields, in_vec, e_data,
513a0162de9SJeremy L Thompson                                      impl, request); CeedChkBackend(ierr);
5141d102b48SJeremy L Thompson 
5151d102b48SJeremy L Thompson   // Output Lvecs, Evecs, and Qvecs
516d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
5171d102b48SJeremy L Thompson     // Set Qvec if needed
518d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
519e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
520d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_NONE) {
5211d102b48SJeremy L Thompson       // Set qvec to single block evec
5229c774eddSJeremy L Thompson       ierr = CeedVectorGetArrayWrite(impl->e_vecs_out[i], CEED_MEM_HOST,
523a0162de9SJeremy L Thompson                                      &e_data[i + num_input_fields]);
524e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
525d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
526a0162de9SJeremy L Thompson                                 CEED_USE_POINTER, e_data[i + num_input_fields]);
527a0162de9SJeremy L Thompson       CeedChkBackend(ierr);
528d1d35e2fSjeremylt       ierr = CeedVectorRestoreArray(impl->e_vecs_out[i],
529a0162de9SJeremy L Thompson                                     &e_data[i + num_input_fields]);
530e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
5311d102b48SJeremy L Thompson     }
5321d102b48SJeremy L Thompson   }
5331d102b48SJeremy L Thompson 
5341d102b48SJeremy L Thompson   // Loop through elements
535d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
5361d102b48SJeremy L Thompson     // Input basis apply
537d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
538d1d35e2fSjeremylt                                       num_input_fields, blk_size, in_vec, false,
539a0162de9SJeremy L Thompson                                       e_data, impl, request); CeedChkBackend(ierr);
5401d102b48SJeremy L Thompson 
5411d102b48SJeremy L Thompson     // Q function
5420b454692Sjeremylt     if (!impl->is_identity_qf) {
543d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
544e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
54516911fdaSjeremylt     }
5461d102b48SJeremy L Thompson 
5471d102b48SJeremy L Thompson     // Output basis apply and restrict
548d1d35e2fSjeremylt     ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields,
549d1d35e2fSjeremylt                                        blk_size, num_input_fields, num_output_fields,
550d1d35e2fSjeremylt                                        op, out_vec, impl, request);
551e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
5521d102b48SJeremy L Thompson   }
5531d102b48SJeremy L Thompson 
5541d102b48SJeremy L Thompson   // Restore input arrays
555d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
556a0162de9SJeremy L Thompson                                        op_input_fields, e_data, impl);
557e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
55889c6efa4Sjeremylt 
559e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
56089c6efa4Sjeremylt }
56189c6efa4Sjeremylt 
562f10650afSjeremylt //------------------------------------------------------------------------------
56370a7ffb3SJeremy L Thompson // Core code for linear QFunction assembly
564f10650afSjeremylt //------------------------------------------------------------------------------
56570a7ffb3SJeremy L Thompson static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op,
56670a7ffb3SJeremy L Thompson     bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
56770a7ffb3SJeremy L Thompson     CeedRequest *request) {
5681d102b48SJeremy L Thompson   int ierr;
5691d102b48SJeremy L Thompson   Ceed ceed;
570e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
571d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
572d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
573d1d35e2fSjeremylt   const CeedInt blk_size = ceed_impl->blk_size;
5741d102b48SJeremy L Thompson   CeedOperator_Opt *impl;
575e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
576d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
577d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
578e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
579d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
5801d102b48SJeremy L Thompson   CeedQFunction qf;
581e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
582d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
5837e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
5847e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
585e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
586d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
5877e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
5887e7773b5SJeremy L Thompson                                 &qf_output_fields);
589e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
5904fc1f125SJeremy L Thompson   CeedVector vec, l_vec = impl->qf_l_vec;
5914fc1f125SJeremy L Thompson   CeedInt num_active_in = impl->num_active_in,
5924fc1f125SJeremy L Thompson           num_active_out = impl->num_active_out;
593bb219a0fSJeremy L Thompson   CeedVector *active_in = impl->qf_active_in;
5941d102b48SJeremy L Thompson   CeedScalar *a, *tmp;
595a0162de9SJeremy L Thompson   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
5961d102b48SJeremy L Thompson 
5971d102b48SJeremy L Thompson   // Setup
598e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
5991d102b48SJeremy L Thompson 
60016911fdaSjeremylt   // Check for identity
6010b454692Sjeremylt   if (impl->is_identity_qf)
60216911fdaSjeremylt     // LCOV_EXCL_START
603e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
604e15f9bd0SJeremy L Thompson                      "Assembling identity qfunctions not supported");
60516911fdaSjeremylt   // LCOV_EXCL_STOP
60616911fdaSjeremylt 
6071d102b48SJeremy L Thompson   // Input Evecs and Restriction
608d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
609a0162de9SJeremy L Thompson                                      op_input_fields, NULL, e_data,
610a0162de9SJeremy L Thompson                                      impl, request); CeedChkBackend(ierr);
6111d102b48SJeremy L Thompson 
6121d102b48SJeremy L Thompson   // Count number of active input fields
613bb219a0fSJeremy L Thompson   if (!num_active_in) {
614d1d35e2fSjeremylt     for (CeedInt i=0; i<num_input_fields; i++) {
6151d102b48SJeremy L Thompson       // Get input vector
616d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
617d1d35e2fSjeremylt       CeedChkBackend(ierr);
6181d102b48SJeremy L Thompson       // Check if active input
6191d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE) {
620d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
621e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
622d1d35e2fSjeremylt         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
623d1d35e2fSjeremylt         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
624d1d35e2fSjeremylt         CeedChkBackend(ierr);
625d1d35e2fSjeremylt         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
6261d102b48SJeremy L Thompson         for (CeedInt field=0; field<size; field++) {
627d1d35e2fSjeremylt           ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
628e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
629d1d35e2fSjeremylt           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
630d1d35e2fSjeremylt                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
631e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
6321d102b48SJeremy L Thompson         }
633d1d35e2fSjeremylt         num_active_in += size;
634d1d35e2fSjeremylt         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
6351d102b48SJeremy L Thompson       }
63689c6efa4Sjeremylt     }
6374fc1f125SJeremy L Thompson     impl->num_active_in = num_active_in;
638bb219a0fSJeremy L Thompson     impl->qf_active_in = active_in;
639bb219a0fSJeremy L Thompson   }
64089c6efa4Sjeremylt 
6411d102b48SJeremy L Thompson   // Count number of active output fields
642bb219a0fSJeremy L Thompson   if (!num_active_out) {
643d1d35e2fSjeremylt     for (CeedInt i=0; i<num_output_fields; i++) {
6441d102b48SJeremy L Thompson       // Get output vector
645d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
646e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6471d102b48SJeremy L Thompson       // Check if active output
6481d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE) {
649d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
650e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
651d1d35e2fSjeremylt         num_active_out += size;
6521d102b48SJeremy L Thompson       }
6531d102b48SJeremy L Thompson     }
6544fc1f125SJeremy L Thompson     impl->num_active_out = num_active_out;
655bb219a0fSJeremy L Thompson   }
6561d102b48SJeremy L Thompson 
6571d102b48SJeremy L Thompson   // Check sizes
658d1d35e2fSjeremylt   if (!num_active_in || !num_active_out)
6591d102b48SJeremy L Thompson     // LCOV_EXCL_START
660e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
661e15f9bd0SJeremy L Thompson                      "Cannot assemble QFunction without active inputs "
6621d102b48SJeremy L Thompson                      "and outputs");
6631d102b48SJeremy L Thompson   // LCOV_EXCL_STOP
6641d102b48SJeremy L Thompson 
6654fc1f125SJeremy L Thompson   // Setup l_vec
6664fc1f125SJeremy L Thompson   if (!l_vec) {
667d1d35e2fSjeremylt     ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
6684fc1f125SJeremy L Thompson                             &l_vec); CeedChkBackend(ierr);
6699c774eddSJeremy L Thompson     ierr = CeedVectorSetValue(l_vec, 0.0); CeedChkBackend(ierr);
6704fc1f125SJeremy L Thompson     impl->qf_l_vec = l_vec;
671bb219a0fSJeremy L Thompson   }
6724fc1f125SJeremy L Thompson   ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
6731d102b48SJeremy L Thompson 
67470a7ffb3SJeremy L Thompson   // Build objects if needed
675d1d35e2fSjeremylt   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
67670a7ffb3SJeremy L Thompson   if (build_objects) {
67770a7ffb3SJeremy L Thompson     // Create output restriction
678d1d35e2fSjeremylt     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
679d1d35e2fSjeremylt                                             num_active_in*num_active_out,
680d1d35e2fSjeremylt                                             num_active_in*num_active_out*num_elem*Q,
681e15f9bd0SJeremy L Thompson                                             strides, rstr); CeedChkBackend(ierr);
6821d102b48SJeremy L Thompson     // Create assembled vector
683d1d35e2fSjeremylt     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
684e15f9bd0SJeremy L Thompson                             assembled); CeedChkBackend(ierr);
68570a7ffb3SJeremy L Thompson   }
6861d102b48SJeremy L Thompson 
6871d102b48SJeremy L Thompson   // Loop through elements
688d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
6891d102b48SJeremy L Thompson     // Input basis apply
690d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
691d1d35e2fSjeremylt                                       num_input_fields, blk_size, NULL, true,
692a0162de9SJeremy L Thompson                                       e_data, impl, request); CeedChkBackend(ierr);
6931d102b48SJeremy L Thompson 
6941d102b48SJeremy L Thompson     // Assemble QFunction
695d1d35e2fSjeremylt     for (CeedInt in=0; in<num_active_in; in++) {
6961d102b48SJeremy L Thompson       // Set Inputs
697d1d35e2fSjeremylt       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
698d1d35e2fSjeremylt       if (num_active_in > 1) {
699d1d35e2fSjeremylt         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
700e15f9bd0SJeremy L Thompson                                   0.0); CeedChkBackend(ierr);
70142ea3801Sjeremylt       }
7021d102b48SJeremy L Thompson       // Set Outputs
703d1d35e2fSjeremylt       for (CeedInt out=0; out<num_output_fields; out++) {
7041d102b48SJeremy L Thompson         // Get output vector
705d1d35e2fSjeremylt         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
706e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
7071d102b48SJeremy L Thompson         // Check if active output
7081d102b48SJeremy L Thompson         if (vec == CEED_VECTOR_ACTIVE) {
709d1d35e2fSjeremylt           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
710e15f9bd0SJeremy L Thompson                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
711d1d35e2fSjeremylt           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
712e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
713d1d35e2fSjeremylt           a += size*Q*blk_size; // Advance the pointer by the size of the output
7141d102b48SJeremy L Thompson         }
7151d102b48SJeremy L Thompson       }
7161d102b48SJeremy L Thompson       // Apply QFunction
717d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
718e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
7191d102b48SJeremy L Thompson     }
7201d102b48SJeremy L Thompson   }
7211d102b48SJeremy L Thompson 
7221d102b48SJeremy L Thompson   // Un-set output Qvecs to prevent accidental overwrite of Assembled
723d1d35e2fSjeremylt   for (CeedInt out=0; out<num_output_fields; out++) {
7241d102b48SJeremy L Thompson     // Get output vector
725d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
726e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
7271d102b48SJeremy L Thompson     // Check if active output
7281d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
729d1d35e2fSjeremylt       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
730e15f9bd0SJeremy L Thompson                          NULL); CeedChkBackend(ierr);
7311d102b48SJeremy L Thompson     }
7321d102b48SJeremy L Thompson   }
7331d102b48SJeremy L Thompson 
7341d102b48SJeremy L Thompson   // Restore input arrays
735d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
736a0162de9SJeremy L Thompson                                        op_input_fields, e_data, impl);
737e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
7381d102b48SJeremy L Thompson 
7391d102b48SJeremy L Thompson   // Output blocked restriction
7404fc1f125SJeremy L Thompson   ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr);
741e15f9bd0SJeremy L Thompson   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
742bb219a0fSJeremy L Thompson   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
743bb219a0fSJeremy L Thompson   if (!blk_rstr) {
744d1d35e2fSjeremylt     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
745d1d35e2fSjeremylt            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
746d1d35e2fSjeremylt            strides, &blk_rstr); CeedChkBackend(ierr);
747bb219a0fSJeremy L Thompson     impl->qf_blk_rstr = blk_rstr;
748bb219a0fSJeremy L Thompson   }
7494fc1f125SJeremy L Thompson   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled,
750e15f9bd0SJeremy L Thompson                                   request); CeedChkBackend(ierr);
7511d102b48SJeremy L Thompson 
752e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
75389c6efa4Sjeremylt }
75489c6efa4Sjeremylt 
755f10650afSjeremylt //------------------------------------------------------------------------------
75670a7ffb3SJeremy L Thompson // Assemble Linear QFunction
75770a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
75870a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op,
75970a7ffb3SJeremy L Thompson     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
76070a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr,
76170a7ffb3SJeremy L Thompson          request);
76270a7ffb3SJeremy L Thompson }
76370a7ffb3SJeremy L Thompson 
76470a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
76570a7ffb3SJeremy L Thompson // Update Assembled Linear QFunction
76670a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
76770a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op,
76870a7ffb3SJeremy L Thompson     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
76970a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled,
77070a7ffb3SJeremy L Thompson          &rstr, request);
77170a7ffb3SJeremy L Thompson }
77270a7ffb3SJeremy L Thompson 
77370a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
774f10650afSjeremylt // Operator Destroy
775f10650afSjeremylt //------------------------------------------------------------------------------
776f10650afSjeremylt static int CeedOperatorDestroy_Opt(CeedOperator op) {
777f10650afSjeremylt   int ierr;
778f10650afSjeremylt   CeedOperator_Opt *impl;
779e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
780f10650afSjeremylt 
7814fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) {
782d1d35e2fSjeremylt     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
7834fc1f125SJeremy L Thompson     ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr);
784f10650afSjeremylt   }
785d1d35e2fSjeremylt   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
7864fc1f125SJeremy L Thompson   ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr);
7874fc1f125SJeremy L Thompson   ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr);
788f10650afSjeremylt 
7894fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_inputs; i++) {
790d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
791d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
792f10650afSjeremylt   }
793d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
794d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
795f10650afSjeremylt 
7964fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_outputs; i++) {
797d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
798d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
799f10650afSjeremylt   }
800d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
801d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
802f10650afSjeremylt 
803bb219a0fSJeremy L Thompson   // QFunction assembly data
8044fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_active_in; i++) {
805bb219a0fSJeremy L Thompson     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
806bb219a0fSJeremy L Thompson   }
807bb219a0fSJeremy L Thompson   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
8084fc1f125SJeremy L Thompson   ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr);
809bb219a0fSJeremy L Thompson   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
810bb219a0fSJeremy L Thompson 
811e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
812e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
813f10650afSjeremylt }
814f10650afSjeremylt 
815f10650afSjeremylt //------------------------------------------------------------------------------
816f10650afSjeremylt // Operator Create
817f10650afSjeremylt //------------------------------------------------------------------------------
81889c6efa4Sjeremylt int CeedOperatorCreate_Opt(CeedOperator op) {
81989c6efa4Sjeremylt   int ierr;
82089c6efa4Sjeremylt   Ceed ceed;
821e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
822d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
823d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
824d1d35e2fSjeremylt   CeedInt blk_size = ceed_impl->blk_size;
82589c6efa4Sjeremylt   CeedOperator_Opt *impl;
82689c6efa4Sjeremylt 
827e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
828e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
82989c6efa4Sjeremylt 
830d1d35e2fSjeremylt   if (blk_size != 1 && blk_size != 8)
83182946b17Sjeremylt     // LCOV_EXCL_START
832e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
833d1d35e2fSjeremylt                      "Opt backend cannot use blocksize: %d", blk_size);
83482946b17Sjeremylt   // LCOV_EXCL_STOP
83582946b17Sjeremylt 
83680ac2e43SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
83780ac2e43SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunction_Opt);
838e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
83970a7ffb3SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op,
84070a7ffb3SJeremy L Thompson                                 "LinearAssembleQFunctionUpdate",
84170a7ffb3SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunctionUpdate_Opt);
84270a7ffb3SJeremy L Thompson   CeedChkBackend(ierr);
843cae8b89aSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
844e15f9bd0SJeremy L Thompson                                 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr);
84589c6efa4Sjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
846e15f9bd0SJeremy L Thompson                                 CeedOperatorDestroy_Opt); CeedChkBackend(ierr);
847e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
84889c6efa4Sjeremylt }
849f10650afSjeremylt //------------------------------------------------------------------------------
850