xref: /libCEED/backends/opt/ceed-opt-operator.c (revision 3d8e882215d238700cdceb37404f76ca7fa24eaa)
1*3d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2*3d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
389c6efa4Sjeremylt //
4*3d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
589c6efa4Sjeremylt //
6*3d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
789c6efa4Sjeremylt 
8ec3da8bcSJed Brown #include <ceed/ceed.h>
9ec3da8bcSJed Brown #include <ceed/backend.h>
103d576824SJeremy L Thompson #include <stdbool.h>
113d576824SJeremy L Thompson #include <stdint.h>
1289c6efa4Sjeremylt #include <string.h>
1389c6efa4Sjeremylt #include "ceed-opt.h"
1489c6efa4Sjeremylt 
15f10650afSjeremylt //------------------------------------------------------------------------------
16f10650afSjeremylt // Setup Input/Output Fields
17f10650afSjeremylt //------------------------------------------------------------------------------
1889c6efa4Sjeremylt static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
194fc1f125SJeremy L Thompson                                        bool is_input, const CeedInt blk_size,
20d1d35e2fSjeremylt                                        CeedElemRestriction *blk_restr,
214fc1f125SJeremy L Thompson                                        CeedVector *e_vecs_full, CeedVector *e_vecs,
22d1d35e2fSjeremylt                                        CeedVector *q_vecs, CeedInt start_e,
23d1d35e2fSjeremylt                                        CeedInt num_fields, CeedInt Q) {
24ebbcc8a3SJeremy L Thompson   CeedInt ierr, num_comp, size, P;
2589c6efa4Sjeremylt   Ceed ceed;
26e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
2789c6efa4Sjeremylt   CeedBasis basis;
2889c6efa4Sjeremylt   CeedElemRestriction r;
29d1d35e2fSjeremylt   CeedOperatorField *op_fields;
30d1d35e2fSjeremylt   CeedQFunctionField *qf_fields;
314fc1f125SJeremy L Thompson   if (is_input) {
327e7773b5SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
33e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
347e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
35e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
364fc1f125SJeremy L Thompson   } else {
374fc1f125SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields);
384fc1f125SJeremy L Thompson     CeedChkBackend(ierr);
394fc1f125SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
404fc1f125SJeremy L Thompson     CeedChkBackend(ierr);
4189c6efa4Sjeremylt   }
4289c6efa4Sjeremylt 
4389c6efa4Sjeremylt   // Loop over fields
44d1d35e2fSjeremylt   for (CeedInt i=0; i<num_fields; i++) {
45d1d35e2fSjeremylt     CeedEvalMode eval_mode;
46d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
47d1d35e2fSjeremylt     CeedChkBackend(ierr);
4889c6efa4Sjeremylt 
49d1d35e2fSjeremylt     if (eval_mode != CEED_EVAL_WEIGHT) {
50d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
51e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
5289c6efa4Sjeremylt       Ceed ceed;
53e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
54e79b91d9SJeremy L Thompson       CeedSize l_size;
55e79b91d9SJeremy L Thompson       CeedInt num_elem, elem_size, comp_stride;
56d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
57d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
58d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
59d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
60bd33150aSjeremylt 
613ac43b2cSJeremy L Thompson       bool strided;
62e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
633ac43b2cSJeremy L Thompson       if (strided) {
643ac43b2cSJeremy L Thompson         CeedInt strides[3];
65e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
66d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
67d1d35e2fSjeremylt                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
68e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
693ac43b2cSJeremy L Thompson       } else {
70bd33150aSjeremylt         const CeedInt *offsets = NULL;
71bd33150aSjeremylt         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
72e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
73d1d35e2fSjeremylt         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
74d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
75d1d35e2fSjeremylt                                                 blk_size, num_comp, comp_stride,
76d1d35e2fSjeremylt                                                 l_size, CEED_MEM_HOST,
77bd33150aSjeremylt                                                 CEED_COPY_VALUES, offsets,
78d1d35e2fSjeremylt                                                 &blk_restr[i+start_e]);
79e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
80e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
813ac43b2cSJeremy L Thompson       }
82d1d35e2fSjeremylt       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
834fc1f125SJeremy L Thompson                                              &e_vecs_full[i+start_e]);
84e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
8589c6efa4Sjeremylt     }
8689c6efa4Sjeremylt 
87d1d35e2fSjeremylt     switch(eval_mode) {
8889c6efa4Sjeremylt     case CEED_EVAL_NONE:
89d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
90d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]);
91d1d35e2fSjeremylt       CeedChkBackend(ierr);
92d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
93d1d35e2fSjeremylt       CeedChkBackend(ierr);
9489c6efa4Sjeremylt       break;
9589c6efa4Sjeremylt     case CEED_EVAL_INTERP:
9689c6efa4Sjeremylt     case CEED_EVAL_GRAD:
97d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
98d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
99ebbcc8a3SJeremy L Thompson       ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr);
100ebbcc8a3SJeremy L Thompson       ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
101ebbcc8a3SJeremy L Thompson       ierr = CeedVectorCreate(ceed, P*num_comp*blk_size, &e_vecs[i]);
102e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
103d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
104d1d35e2fSjeremylt       CeedChkBackend(ierr);
10589c6efa4Sjeremylt       break;
10689c6efa4Sjeremylt     case CEED_EVAL_WEIGHT: // Only on input fields
107d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
108d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
109d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
110d1d35e2fSjeremylt                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
111e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
11289c6efa4Sjeremylt 
11389c6efa4Sjeremylt       break;
11489c6efa4Sjeremylt     case CEED_EVAL_DIV:
1155f67fadeSJeremy L Thompson       break; // Not implemented
11689c6efa4Sjeremylt     case CEED_EVAL_CURL:
1175f67fadeSJeremy L Thompson       break; // Not implemented
11889c6efa4Sjeremylt     }
1199c774eddSJeremy L Thompson     if (is_input && !!e_vecs[i]) {
1209c774eddSJeremy L Thompson       ierr = CeedVectorSetArray(e_vecs[i], CEED_MEM_HOST,
1219c774eddSJeremy L Thompson                                 CEED_COPY_VALUES, NULL); CeedChkBackend(ierr);
1229c774eddSJeremy L Thompson     }
12389c6efa4Sjeremylt   }
124e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
12589c6efa4Sjeremylt }
12689c6efa4Sjeremylt 
127f10650afSjeremylt //------------------------------------------------------------------------------
128f10650afSjeremylt // Setup Operator
129f10650afSjeremylt //------------------------------------------------------------------------------
13089c6efa4Sjeremylt static int CeedOperatorSetup_Opt(CeedOperator op) {
13189c6efa4Sjeremylt   int ierr;
1328c1105f8SJeremy L Thompson   bool is_setup_done;
1338c1105f8SJeremy L Thompson   ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr);
1348c1105f8SJeremy L Thompson   if (is_setup_done) return CEED_ERROR_SUCCESS;
13589c6efa4Sjeremylt   Ceed ceed;
136e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
137d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
138d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
139d1d35e2fSjeremylt   const CeedInt blk_size = ceed_impl->blk_size;
14089c6efa4Sjeremylt   CeedOperator_Opt *impl;
141e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
14289c6efa4Sjeremylt   CeedQFunction qf;
143e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
144d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields;
145e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
1460b454692Sjeremylt   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
147d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
1487e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
1497e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
150e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
151d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
1527e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
1537e7773b5SJeremy L Thompson                                 &qf_output_fields);
154e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
15589c6efa4Sjeremylt 
15689c6efa4Sjeremylt   // Allocate
157d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
158e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1594fc1f125SJeremy L Thompson   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full);
160e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
16189c6efa4Sjeremylt 
1624fc1f125SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr);
163bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
164bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
165bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
166bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
16789c6efa4Sjeremylt 
1684fc1f125SJeremy L Thompson   impl->num_inputs = num_input_fields;
1694fc1f125SJeremy L Thompson   impl->num_outputs = num_output_fields;
17089c6efa4Sjeremylt 
17189c6efa4Sjeremylt   // Set up infield and outfield pointer arrays
17289c6efa4Sjeremylt   // Infields
1734fc1f125SJeremy L Thompson   ierr = CeedOperatorSetupFields_Opt(qf, op, true, blk_size, impl->blk_restr,
1744fc1f125SJeremy L Thompson                                      impl->e_vecs_full, impl->e_vecs_in,
175d1d35e2fSjeremylt                                      impl->q_vecs_in, 0, num_input_fields, Q);
176e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
17789c6efa4Sjeremylt   // Outfields
1784fc1f125SJeremy L Thompson   ierr = CeedOperatorSetupFields_Opt(qf, op, false, blk_size, impl->blk_restr,
1794fc1f125SJeremy L Thompson                                      impl->e_vecs_full, impl->e_vecs_out,
180d1d35e2fSjeremylt                                      impl->q_vecs_out, num_input_fields,
181d1d35e2fSjeremylt                                      num_output_fields, Q);
182e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
18389c6efa4Sjeremylt 
18416911fdaSjeremylt   // Identity QFunctions
1850b454692Sjeremylt   if (impl->is_identity_qf) {
186d1d35e2fSjeremylt     CeedEvalMode in_mode, out_mode;
187d1d35e2fSjeremylt     CeedQFunctionField *in_fields, *out_fields;
1887e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
189e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
1900b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
191d1d35e2fSjeremylt     CeedChkBackend(ierr);
1920b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
193d1d35e2fSjeremylt     CeedChkBackend(ierr);
194d1d35e2fSjeremylt 
1950b454692Sjeremylt     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
1960b454692Sjeremylt       impl->is_identity_restr_op = true;
1970b454692Sjeremylt     } else {
1980b454692Sjeremylt       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
1990b454692Sjeremylt       impl->q_vecs_out[0] = impl->q_vecs_in[0];
2000b454692Sjeremylt       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
20116911fdaSjeremylt     }
20216911fdaSjeremylt   }
20316911fdaSjeremylt 
204e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
20589c6efa4Sjeremylt 
206e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
20789c6efa4Sjeremylt }
20889c6efa4Sjeremylt 
209f10650afSjeremylt //------------------------------------------------------------------------------
210f10650afSjeremylt // Setup Input Fields
211f10650afSjeremylt //------------------------------------------------------------------------------
212d1d35e2fSjeremylt static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields,
213d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
214a0162de9SJeremy L Thompson     CeedVector in_vec, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
215a0162de9SJeremy L Thompson     CeedRequest *request) {
2161d102b48SJeremy L Thompson   CeedInt ierr;
217d1d35e2fSjeremylt   CeedEvalMode eval_mode;
21889c6efa4Sjeremylt   CeedVector vec;
21989c6efa4Sjeremylt   uint64_t state;
22089c6efa4Sjeremylt 
221d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
222d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
223e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
224d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
22589c6efa4Sjeremylt     } else {
22689c6efa4Sjeremylt       // Get input vector
227d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
228d1d35e2fSjeremylt       CeedChkBackend(ierr);
22989c6efa4Sjeremylt       if (vec != CEED_VECTOR_ACTIVE) {
23089c6efa4Sjeremylt         // Restrict
231e15f9bd0SJeremy L Thompson         ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
2324fc1f125SJeremy L Thompson         if (state != impl->input_states[i]) {
233d1d35e2fSjeremylt           ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
2344fc1f125SJeremy L Thompson                                           vec, impl->e_vecs_full[i], request);
235e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
2364fc1f125SJeremy L Thompson           impl->input_states[i] = state;
23789c6efa4Sjeremylt         }
23889c6efa4Sjeremylt         // Get evec
2394fc1f125SJeremy L Thompson         ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST,
240a0162de9SJeremy L Thompson                                       (const CeedScalar **) &e_data[i]);
241e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
2429c774eddSJeremy L Thompson       } else {
2439c774eddSJeremy L Thompson         // Set Qvec for CEED_EVAL_NONE
2449c774eddSJeremy L Thompson         if (eval_mode == CEED_EVAL_NONE) {
2459c774eddSJeremy L Thompson           ierr = CeedVectorGetArrayRead(impl->e_vecs_in[i], CEED_MEM_HOST,
2469c774eddSJeremy L Thompson                                         (const CeedScalar **)&e_data[i]);
2479c774eddSJeremy L Thompson           CeedChkBackend(ierr);
2489c774eddSJeremy L Thompson           ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
2499c774eddSJeremy L Thompson                                     CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr);
2509c774eddSJeremy L Thompson           ierr = CeedVectorRestoreArrayRead(impl->e_vecs_in[i],
2519c774eddSJeremy L Thompson                                             (const CeedScalar **)&e_data[i]);
2529c774eddSJeremy L Thompson           CeedChkBackend(ierr);
2539c774eddSJeremy L Thompson         }
2549c774eddSJeremy L Thompson       }
25589c6efa4Sjeremylt     }
25689c6efa4Sjeremylt   }
257e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2581d102b48SJeremy L Thompson }
25989c6efa4Sjeremylt 
260f10650afSjeremylt //------------------------------------------------------------------------------
261f10650afSjeremylt // Input Basis Action
262f10650afSjeremylt //------------------------------------------------------------------------------
2631d102b48SJeremy L Thompson static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
264d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
265d1d35e2fSjeremylt     CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active,
266a0162de9SJeremy L Thompson     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl,
267a0162de9SJeremy L Thompson     CeedRequest *request) {
2681d102b48SJeremy L Thompson   CeedInt ierr;
269d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
270d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
271d1d35e2fSjeremylt   CeedEvalMode eval_mode;
2721d102b48SJeremy L Thompson   CeedBasis basis;
2731d102b48SJeremy L Thompson   CeedVector vec;
27489c6efa4Sjeremylt 
275d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
276d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
277d1d35e2fSjeremylt     CeedChkBackend(ierr);
2781d102b48SJeremy L Thompson     // Skip active input
279d1d35e2fSjeremylt     if (skip_active) {
2801d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
2811d102b48SJeremy L Thompson         continue;
2821d102b48SJeremy L Thompson     }
2831d102b48SJeremy L Thompson 
284d1d35e2fSjeremylt     CeedInt active_in = 0;
285d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
286d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
287e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
288d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
289e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
290d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
291e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
292d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
293d1d35e2fSjeremylt     CeedChkBackend(ierr);
29489c6efa4Sjeremylt     // Restrict block active input
29589c6efa4Sjeremylt     if (vec == CEED_VECTOR_ACTIVE) {
296d1d35e2fSjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size,
297d1d35e2fSjeremylt                                            CEED_NOTRANSPOSE, in_vec,
298d1d35e2fSjeremylt                                            impl->e_vecs_in[i], request);
299e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
300d1d35e2fSjeremylt       active_in = 1;
30189c6efa4Sjeremylt     }
30289c6efa4Sjeremylt     // Basis action
303d1d35e2fSjeremylt     switch(eval_mode) {
30489c6efa4Sjeremylt     case CEED_EVAL_NONE:
305d1d35e2fSjeremylt       if (!active_in) {
306d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
307a0162de9SJeremy L Thompson                                   CEED_USE_POINTER, &e_data[i][e*Q*size]);
308a0162de9SJeremy L Thompson         CeedChkBackend(ierr);
30989c6efa4Sjeremylt       }
31089c6efa4Sjeremylt       break;
31189c6efa4Sjeremylt     case CEED_EVAL_INTERP:
312d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
313e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
314d1d35e2fSjeremylt       if (!active_in) {
315d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
316a0162de9SJeremy L Thompson                                   CEED_USE_POINTER, &e_data[i][e*elem_size*size]);
317e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
31889c6efa4Sjeremylt       }
319d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
320d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
321d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
32289c6efa4Sjeremylt       break;
32389c6efa4Sjeremylt     case CEED_EVAL_GRAD:
324d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
325e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
326d1d35e2fSjeremylt       if (!active_in) {
327e15f9bd0SJeremy L Thompson         ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
328d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
32989c6efa4Sjeremylt                                   CEED_USE_POINTER,
330a0162de9SJeremy L Thompson                                   &e_data[i][e*elem_size*size/dim]);
331e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
33289c6efa4Sjeremylt       }
333d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
334d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
335d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
33689c6efa4Sjeremylt       break;
33789c6efa4Sjeremylt     case CEED_EVAL_WEIGHT:
33889c6efa4Sjeremylt       break;  // No action
339bbfacfcdSjeremylt     // LCOV_EXCL_START
34089c6efa4Sjeremylt     case CEED_EVAL_DIV:
3411d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
342d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
343e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
3441d102b48SJeremy L Thompson       Ceed ceed;
345e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
346e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
347e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
348bbfacfcdSjeremylt       // LCOV_EXCL_STOP
3491d102b48SJeremy L Thompson     }
3501d102b48SJeremy L Thompson     }
3511d102b48SJeremy L Thompson   }
352e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3531d102b48SJeremy L Thompson }
35489c6efa4Sjeremylt 
355f10650afSjeremylt //------------------------------------------------------------------------------
356f10650afSjeremylt // Output Basis Action
357f10650afSjeremylt //------------------------------------------------------------------------------
3581d102b48SJeremy L Thompson static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
359d1d35e2fSjeremylt     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
360d1d35e2fSjeremylt     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
361d1d35e2fSjeremylt     CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl,
3621d102b48SJeremy L Thompson     CeedRequest *request) {
3631d102b48SJeremy L Thompson   CeedInt ierr;
364d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
365d1d35e2fSjeremylt   CeedEvalMode eval_mode;
3661d102b48SJeremy L Thompson   CeedBasis basis;
3671d102b48SJeremy L Thompson   CeedVector vec;
3681d102b48SJeremy L Thompson 
369d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
370d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
371d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
372e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
373d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
374e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
37589c6efa4Sjeremylt     // Basis action
376d1d35e2fSjeremylt     switch(eval_mode) {
37789c6efa4Sjeremylt     case CEED_EVAL_NONE:
37889c6efa4Sjeremylt       break; // No action
37989c6efa4Sjeremylt     case CEED_EVAL_INTERP:
380d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
381e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
382d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
383d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
384d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
38589c6efa4Sjeremylt       break;
38689c6efa4Sjeremylt     case CEED_EVAL_GRAD:
387d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
388e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
389d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
390d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
391d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
39289c6efa4Sjeremylt       break;
393c042f62fSJeremy L Thompson     // LCOV_EXCL_START
394bbfacfcdSjeremylt     case CEED_EVAL_WEIGHT: {
39589c6efa4Sjeremylt       Ceed ceed;
396e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
397e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
398e15f9bd0SJeremy L Thompson                        "CEED_EVAL_WEIGHT cannot be an output "
3991d102b48SJeremy L Thompson                        "evaluation mode");
40089c6efa4Sjeremylt     }
40189c6efa4Sjeremylt     case CEED_EVAL_DIV:
4021d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
4031d102b48SJeremy L Thompson       Ceed ceed;
404e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
405e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
406e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
407bbfacfcdSjeremylt       // LCOV_EXCL_STOP
4081d102b48SJeremy L Thompson     }
40989c6efa4Sjeremylt     }
41089c6efa4Sjeremylt     // Restrict output block
41189c6efa4Sjeremylt     // Get output vector
412d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
413e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
41489c6efa4Sjeremylt     if (vec == CEED_VECTOR_ACTIVE)
415d1d35e2fSjeremylt       vec = out_vec;
41689c6efa4Sjeremylt     // Restrict
4174fc1f125SJeremy L Thompson     ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_inputs],
418d1d35e2fSjeremylt                                          e/blk_size, CEED_TRANSPOSE,
419d1d35e2fSjeremylt                                          impl->e_vecs_out[i], vec, request);
420e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
42189c6efa4Sjeremylt   }
422e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
42389c6efa4Sjeremylt }
42489c6efa4Sjeremylt 
425f10650afSjeremylt //------------------------------------------------------------------------------
426f10650afSjeremylt // Restore Input Vectors
427f10650afSjeremylt //------------------------------------------------------------------------------
428d1d35e2fSjeremylt static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields,
429d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
430a0162de9SJeremy L Thompson     CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) {
4311d102b48SJeremy L Thompson   CeedInt ierr;
4321d102b48SJeremy L Thompson 
433d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
4349c774eddSJeremy L Thompson     CeedEvalMode eval_mode;
4359c774eddSJeremy L Thompson     CeedVector vec;
436d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
437e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
4389c774eddSJeremy L Thompson     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
4399c774eddSJeremy L Thompson     CeedChkBackend(ierr);
4409c774eddSJeremy L Thompson     if (eval_mode != CEED_EVAL_WEIGHT && vec != CEED_VECTOR_ACTIVE) {
4414fc1f125SJeremy L Thompson       ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i],
442a0162de9SJeremy L Thompson                                         (const CeedScalar **) &e_data[i]);
443e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
44489c6efa4Sjeremylt     }
44589c6efa4Sjeremylt   }
446e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4471d102b48SJeremy L Thompson }
4481d102b48SJeremy L Thompson 
449f10650afSjeremylt //------------------------------------------------------------------------------
450f10650afSjeremylt // Operator Apply
451f10650afSjeremylt //------------------------------------------------------------------------------
452d1d35e2fSjeremylt static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec,
453d1d35e2fSjeremylt                                     CeedVector out_vec, CeedRequest *request) {
4541d102b48SJeremy L Thompson   int ierr;
4551d102b48SJeremy L Thompson   Ceed ceed;
456e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
457d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
458d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
459d1d35e2fSjeremylt   CeedInt blk_size = ceed_impl->blk_size;
4601d102b48SJeremy L Thompson   CeedOperator_Opt *impl;
461e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
462d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem;
463d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
464e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
465d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
4661d102b48SJeremy L Thompson   CeedQFunction qf;
467e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
468d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
4697e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
4707e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
471e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
472d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
4737e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
4747e7773b5SJeremy L Thompson                                 &qf_output_fields);
475e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
476d1d35e2fSjeremylt   CeedEvalMode eval_mode;
477a0162de9SJeremy L Thompson   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
4781d102b48SJeremy L Thompson 
4791d102b48SJeremy L Thompson   // Setup
480e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
4811d102b48SJeremy L Thompson 
4820b454692Sjeremylt   // Restriction only operator
4830b454692Sjeremylt   if (impl->is_identity_restr_op) {
4840b454692Sjeremylt     for (CeedInt b=0; b<num_blks; b++) {
4850b454692Sjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE,
4860b454692Sjeremylt                                            in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr);
4870b454692Sjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE,
4880b454692Sjeremylt                                            impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr);
4890b454692Sjeremylt     }
4900b454692Sjeremylt     return CEED_ERROR_SUCCESS;
4910b454692Sjeremylt   }
4920b454692Sjeremylt 
4931d102b48SJeremy L Thompson   // Input Evecs and Restriction
494d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
495a0162de9SJeremy L Thompson                                      op_input_fields, in_vec, e_data,
496a0162de9SJeremy L Thompson                                      impl, request); CeedChkBackend(ierr);
4971d102b48SJeremy L Thompson 
4981d102b48SJeremy L Thompson   // Output Lvecs, Evecs, and Qvecs
499d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
5001d102b48SJeremy L Thompson     // Set Qvec if needed
501d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
502e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
503d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_NONE) {
5041d102b48SJeremy L Thompson       // Set qvec to single block evec
5059c774eddSJeremy L Thompson       ierr = CeedVectorGetArrayWrite(impl->e_vecs_out[i], CEED_MEM_HOST,
506a0162de9SJeremy L Thompson                                      &e_data[i + num_input_fields]);
507e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
508d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
509a0162de9SJeremy L Thompson                                 CEED_USE_POINTER, e_data[i + num_input_fields]);
510a0162de9SJeremy L Thompson       CeedChkBackend(ierr);
511d1d35e2fSjeremylt       ierr = CeedVectorRestoreArray(impl->e_vecs_out[i],
512a0162de9SJeremy L Thompson                                     &e_data[i + num_input_fields]);
513e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
5141d102b48SJeremy L Thompson     }
5151d102b48SJeremy L Thompson   }
5161d102b48SJeremy L Thompson 
5171d102b48SJeremy L Thompson   // Loop through elements
518d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
5191d102b48SJeremy L Thompson     // Input basis apply
520d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
521d1d35e2fSjeremylt                                       num_input_fields, blk_size, in_vec, false,
522a0162de9SJeremy L Thompson                                       e_data, impl, request); CeedChkBackend(ierr);
5231d102b48SJeremy L Thompson 
5241d102b48SJeremy L Thompson     // Q function
5250b454692Sjeremylt     if (!impl->is_identity_qf) {
526d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
527e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
52816911fdaSjeremylt     }
5291d102b48SJeremy L Thompson 
5301d102b48SJeremy L Thompson     // Output basis apply and restrict
531d1d35e2fSjeremylt     ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields,
532d1d35e2fSjeremylt                                        blk_size, num_input_fields, num_output_fields,
533d1d35e2fSjeremylt                                        op, out_vec, impl, request);
534e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
5351d102b48SJeremy L Thompson   }
5361d102b48SJeremy L Thompson 
5371d102b48SJeremy L Thompson   // Restore input arrays
538d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
539a0162de9SJeremy L Thompson                                        op_input_fields, e_data, impl);
540e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
54189c6efa4Sjeremylt 
542e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
54389c6efa4Sjeremylt }
54489c6efa4Sjeremylt 
545f10650afSjeremylt //------------------------------------------------------------------------------
54670a7ffb3SJeremy L Thompson // Core code for linear QFunction assembly
547f10650afSjeremylt //------------------------------------------------------------------------------
54870a7ffb3SJeremy L Thompson static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op,
54970a7ffb3SJeremy L Thompson     bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
55070a7ffb3SJeremy L Thompson     CeedRequest *request) {
5511d102b48SJeremy L Thompson   int ierr;
5521d102b48SJeremy L Thompson   Ceed ceed;
553e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
554d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
555d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
556d1d35e2fSjeremylt   const CeedInt blk_size = ceed_impl->blk_size;
5571d102b48SJeremy L Thompson   CeedOperator_Opt *impl;
558e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
559d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
560d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
561e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
562d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
5631d102b48SJeremy L Thompson   CeedQFunction qf;
564e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
565d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
5667e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
5677e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
568e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
569d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
5707e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
5717e7773b5SJeremy L Thompson                                 &qf_output_fields);
572e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
5734fc1f125SJeremy L Thompson   CeedVector vec, l_vec = impl->qf_l_vec;
5744fc1f125SJeremy L Thompson   CeedInt num_active_in = impl->num_active_in,
5754fc1f125SJeremy L Thompson           num_active_out = impl->num_active_out;
576bb219a0fSJeremy L Thompson   CeedVector *active_in = impl->qf_active_in;
5771d102b48SJeremy L Thompson   CeedScalar *a, *tmp;
578a0162de9SJeremy L Thompson   CeedScalar *e_data[2*CEED_FIELD_MAX] = {0};
5791d102b48SJeremy L Thompson 
5801d102b48SJeremy L Thompson   // Setup
581e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
5821d102b48SJeremy L Thompson 
58316911fdaSjeremylt   // Check for identity
5840b454692Sjeremylt   if (impl->is_identity_qf)
58516911fdaSjeremylt     // LCOV_EXCL_START
586e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
587e15f9bd0SJeremy L Thompson                      "Assembling identity qfunctions not supported");
58816911fdaSjeremylt   // LCOV_EXCL_STOP
58916911fdaSjeremylt 
5901d102b48SJeremy L Thompson   // Input Evecs and Restriction
591d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
592a0162de9SJeremy L Thompson                                      op_input_fields, NULL, e_data,
593a0162de9SJeremy L Thompson                                      impl, request); CeedChkBackend(ierr);
5941d102b48SJeremy L Thompson 
5951d102b48SJeremy L Thompson   // Count number of active input fields
596bb219a0fSJeremy L Thompson   if (!num_active_in) {
597d1d35e2fSjeremylt     for (CeedInt i=0; i<num_input_fields; i++) {
5981d102b48SJeremy L Thompson       // Get input vector
599d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
600d1d35e2fSjeremylt       CeedChkBackend(ierr);
6011d102b48SJeremy L Thompson       // Check if active input
6021d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE) {
603d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
604e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
605d1d35e2fSjeremylt         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
606d1d35e2fSjeremylt         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
607d1d35e2fSjeremylt         CeedChkBackend(ierr);
608d1d35e2fSjeremylt         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
6091d102b48SJeremy L Thompson         for (CeedInt field=0; field<size; field++) {
610d1d35e2fSjeremylt           ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
611e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
612d1d35e2fSjeremylt           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
613d1d35e2fSjeremylt                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
614e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
6151d102b48SJeremy L Thompson         }
616d1d35e2fSjeremylt         num_active_in += size;
617d1d35e2fSjeremylt         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
6181d102b48SJeremy L Thompson       }
61989c6efa4Sjeremylt     }
6204fc1f125SJeremy L Thompson     impl->num_active_in = num_active_in;
621bb219a0fSJeremy L Thompson     impl->qf_active_in = active_in;
622bb219a0fSJeremy L Thompson   }
62389c6efa4Sjeremylt 
6241d102b48SJeremy L Thompson   // Count number of active output fields
625bb219a0fSJeremy L Thompson   if (!num_active_out) {
626d1d35e2fSjeremylt     for (CeedInt i=0; i<num_output_fields; i++) {
6271d102b48SJeremy L Thompson       // Get output vector
628d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
629e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6301d102b48SJeremy L Thompson       // Check if active output
6311d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE) {
632d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
633e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
634d1d35e2fSjeremylt         num_active_out += size;
6351d102b48SJeremy L Thompson       }
6361d102b48SJeremy L Thompson     }
6374fc1f125SJeremy L Thompson     impl->num_active_out = num_active_out;
638bb219a0fSJeremy L Thompson   }
6391d102b48SJeremy L Thompson 
6401d102b48SJeremy L Thompson   // Check sizes
641d1d35e2fSjeremylt   if (!num_active_in || !num_active_out)
6421d102b48SJeremy L Thompson     // LCOV_EXCL_START
643e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
644e15f9bd0SJeremy L Thompson                      "Cannot assemble QFunction without active inputs "
6451d102b48SJeremy L Thompson                      "and outputs");
6461d102b48SJeremy L Thompson   // LCOV_EXCL_STOP
6471d102b48SJeremy L Thompson 
6484fc1f125SJeremy L Thompson   // Setup l_vec
6494fc1f125SJeremy L Thompson   if (!l_vec) {
65016e5f7d7SJeremy L Thompson     ierr = CeedVectorCreate(ceed, blk_size*Q*num_active_in*num_active_out,
6514fc1f125SJeremy L Thompson                             &l_vec); CeedChkBackend(ierr);
6529c774eddSJeremy L Thompson     ierr = CeedVectorSetValue(l_vec, 0.0); CeedChkBackend(ierr);
6534fc1f125SJeremy L Thompson     impl->qf_l_vec = l_vec;
654bb219a0fSJeremy L Thompson   }
6551d102b48SJeremy L Thompson 
65670a7ffb3SJeremy L Thompson   // Build objects if needed
657d1d35e2fSjeremylt   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
65870a7ffb3SJeremy L Thompson   if (build_objects) {
65970a7ffb3SJeremy L Thompson     // Create output restriction
660d1d35e2fSjeremylt     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
661d1d35e2fSjeremylt                                             num_active_in*num_active_out,
662d1d35e2fSjeremylt                                             num_active_in*num_active_out*num_elem*Q,
663e15f9bd0SJeremy L Thompson                                             strides, rstr); CeedChkBackend(ierr);
6641d102b48SJeremy L Thompson     // Create assembled vector
665d1d35e2fSjeremylt     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
666e15f9bd0SJeremy L Thompson                             assembled); CeedChkBackend(ierr);
66770a7ffb3SJeremy L Thompson   }
6681d102b48SJeremy L Thompson 
66916e5f7d7SJeremy L Thompson   // Output blocked restriction
67016e5f7d7SJeremy L Thompson   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
67116e5f7d7SJeremy L Thompson   if (!blk_rstr) {
67216e5f7d7SJeremy L Thompson     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
67316e5f7d7SJeremy L Thompson            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
67416e5f7d7SJeremy L Thompson            strides, &blk_rstr); CeedChkBackend(ierr);
67516e5f7d7SJeremy L Thompson     impl->qf_blk_rstr = blk_rstr;
67616e5f7d7SJeremy L Thompson   }
67716e5f7d7SJeremy L Thompson 
6781d102b48SJeremy L Thompson   // Loop through elements
67916e5f7d7SJeremy L Thompson   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
680d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
68116e5f7d7SJeremy L Thompson     ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
68216e5f7d7SJeremy L Thompson 
6831d102b48SJeremy L Thompson     // Input basis apply
684d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
685d1d35e2fSjeremylt                                       num_input_fields, blk_size, NULL, true,
686a0162de9SJeremy L Thompson                                       e_data, impl, request); CeedChkBackend(ierr);
6871d102b48SJeremy L Thompson 
6881d102b48SJeremy L Thompson     // Assemble QFunction
689d1d35e2fSjeremylt     for (CeedInt in=0; in<num_active_in; in++) {
6901d102b48SJeremy L Thompson       // Set Inputs
691d1d35e2fSjeremylt       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
692d1d35e2fSjeremylt       if (num_active_in > 1) {
693d1d35e2fSjeremylt         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
694e15f9bd0SJeremy L Thompson                                   0.0); CeedChkBackend(ierr);
69542ea3801Sjeremylt       }
6961d102b48SJeremy L Thompson       // Set Outputs
697d1d35e2fSjeremylt       for (CeedInt out=0; out<num_output_fields; out++) {
6981d102b48SJeremy L Thompson         // Get output vector
699d1d35e2fSjeremylt         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
700e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
7011d102b48SJeremy L Thompson         // Check if active output
7021d102b48SJeremy L Thompson         if (vec == CEED_VECTOR_ACTIVE) {
703d1d35e2fSjeremylt           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
704e15f9bd0SJeremy L Thompson                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
705d1d35e2fSjeremylt           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
706e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
707d1d35e2fSjeremylt           a += size*Q*blk_size; // Advance the pointer by the size of the output
7081d102b48SJeremy L Thompson         }
7091d102b48SJeremy L Thompson       }
7101d102b48SJeremy L Thompson       // Apply QFunction
711d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
712e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
7131d102b48SJeremy L Thompson     }
71416e5f7d7SJeremy L Thompson 
71516e5f7d7SJeremy L Thompson     // Assemble into assembled vector
71616e5f7d7SJeremy L Thompson     ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr);
71716e5f7d7SJeremy L Thompson     ierr = CeedElemRestrictionApplyBlock(blk_rstr, e/blk_size, CEED_TRANSPOSE,
71816e5f7d7SJeremy L Thompson                                          l_vec, *assembled, request); CeedChkBackend(ierr);
7191d102b48SJeremy L Thompson   }
7201d102b48SJeremy L Thompson 
7211d102b48SJeremy L Thompson   // Un-set output Qvecs to prevent accidental overwrite of Assembled
722d1d35e2fSjeremylt   for (CeedInt out=0; out<num_output_fields; out++) {
7231d102b48SJeremy L Thompson     // Get output vector
724d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
725e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
7261d102b48SJeremy L Thompson     // Check if active output
7271d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
728d1d35e2fSjeremylt       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
729e15f9bd0SJeremy L Thompson                          NULL); CeedChkBackend(ierr);
7301d102b48SJeremy L Thompson     }
7311d102b48SJeremy L Thompson   }
7321d102b48SJeremy L Thompson 
7331d102b48SJeremy L Thompson   // Restore input arrays
734d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
735a0162de9SJeremy L Thompson                                        op_input_fields, e_data, impl);
736e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
7371d102b48SJeremy L Thompson 
738e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
73989c6efa4Sjeremylt }
74089c6efa4Sjeremylt 
741f10650afSjeremylt //------------------------------------------------------------------------------
74270a7ffb3SJeremy L Thompson // Assemble Linear QFunction
74370a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
74470a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op,
74570a7ffb3SJeremy L Thompson     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
74670a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr,
74770a7ffb3SJeremy L Thompson          request);
74870a7ffb3SJeremy L Thompson }
74970a7ffb3SJeremy L Thompson 
75070a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
75170a7ffb3SJeremy L Thompson // Update Assembled Linear QFunction
75270a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
75370a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op,
75470a7ffb3SJeremy L Thompson     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
75570a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled,
75670a7ffb3SJeremy L Thompson          &rstr, request);
75770a7ffb3SJeremy L Thompson }
75870a7ffb3SJeremy L Thompson 
75970a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
760f10650afSjeremylt // Operator Destroy
761f10650afSjeremylt //------------------------------------------------------------------------------
762f10650afSjeremylt static int CeedOperatorDestroy_Opt(CeedOperator op) {
763f10650afSjeremylt   int ierr;
764f10650afSjeremylt   CeedOperator_Opt *impl;
765e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
766f10650afSjeremylt 
7674fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) {
768d1d35e2fSjeremylt     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
7694fc1f125SJeremy L Thompson     ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr);
770f10650afSjeremylt   }
771d1d35e2fSjeremylt   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
7724fc1f125SJeremy L Thompson   ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr);
7734fc1f125SJeremy L Thompson   ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr);
774f10650afSjeremylt 
7754fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_inputs; i++) {
776d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
777d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
778f10650afSjeremylt   }
779d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
780d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
781f10650afSjeremylt 
7824fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_outputs; i++) {
783d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
784d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
785f10650afSjeremylt   }
786d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
787d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
788f10650afSjeremylt 
789bb219a0fSJeremy L Thompson   // QFunction assembly data
7904fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_active_in; i++) {
791bb219a0fSJeremy L Thompson     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
792bb219a0fSJeremy L Thompson   }
793bb219a0fSJeremy L Thompson   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
7944fc1f125SJeremy L Thompson   ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr);
795bb219a0fSJeremy L Thompson   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
796bb219a0fSJeremy L Thompson 
797e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
798e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
799f10650afSjeremylt }
800f10650afSjeremylt 
801f10650afSjeremylt //------------------------------------------------------------------------------
802f10650afSjeremylt // Operator Create
803f10650afSjeremylt //------------------------------------------------------------------------------
80489c6efa4Sjeremylt int CeedOperatorCreate_Opt(CeedOperator op) {
80589c6efa4Sjeremylt   int ierr;
80689c6efa4Sjeremylt   Ceed ceed;
807e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
808d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
809d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
810d1d35e2fSjeremylt   CeedInt blk_size = ceed_impl->blk_size;
81189c6efa4Sjeremylt   CeedOperator_Opt *impl;
81289c6efa4Sjeremylt 
813e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
814e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
81589c6efa4Sjeremylt 
816d1d35e2fSjeremylt   if (blk_size != 1 && blk_size != 8)
81782946b17Sjeremylt     // LCOV_EXCL_START
818e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
819d1d35e2fSjeremylt                      "Opt backend cannot use blocksize: %d", blk_size);
82082946b17Sjeremylt   // LCOV_EXCL_STOP
82182946b17Sjeremylt 
82280ac2e43SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
82380ac2e43SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunction_Opt);
824e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
82570a7ffb3SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op,
82670a7ffb3SJeremy L Thompson                                 "LinearAssembleQFunctionUpdate",
82770a7ffb3SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunctionUpdate_Opt);
82870a7ffb3SJeremy L Thompson   CeedChkBackend(ierr);
829cae8b89aSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
830e15f9bd0SJeremy L Thompson                                 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr);
83189c6efa4Sjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
832e15f9bd0SJeremy L Thompson                                 CeedOperatorDestroy_Opt); CeedChkBackend(ierr);
833e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
83489c6efa4Sjeremylt }
835f10650afSjeremylt //------------------------------------------------------------------------------
836