xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision d1d35e2f02dc969aee8debf3fd943dd784aa847a)
14a2e7687Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
24a2e7687Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
34a2e7687Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
44a2e7687Sjeremylt //
54a2e7687Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
64a2e7687Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
74a2e7687Sjeremylt // element discretizations for exascale applications. For more information and
84a2e7687Sjeremylt // source code availability see http://github.com/ceed.
94a2e7687Sjeremylt //
104a2e7687Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
114a2e7687Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
124a2e7687Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
134a2e7687Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
144a2e7687Sjeremylt // software, applications, hardware, advanced system engineering and early
154a2e7687Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
164a2e7687Sjeremylt 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
193d576824SJeremy L Thompson #include <stdbool.h>
203d576824SJeremy L Thompson #include <stddef.h>
213d576824SJeremy L Thompson #include <stdint.h>
224a2e7687Sjeremylt #include "ceed-blocked.h"
234a2e7687Sjeremylt 
24f10650afSjeremylt //------------------------------------------------------------------------------
25f10650afSjeremylt // Setup Input/Output Fields
26f10650afSjeremylt //------------------------------------------------------------------------------
2789c6efa4Sjeremylt static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
28*d1d35e2fSjeremylt     CeedOperator op, bool inOrOut, CeedElemRestriction *blk_restr,
29*d1d35e2fSjeremylt     CeedVector *full_evecs, CeedVector *e_vecs, CeedVector *q_vecs,
30*d1d35e2fSjeremylt     CeedInt start_e, CeedInt numfields, CeedInt Q) {
31*d1d35e2fSjeremylt   CeedInt dim, ierr, num_comp, size, P;
32aedaa0e5Sjeremylt   Ceed ceed;
33e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
34d1bcdac9Sjeremylt   CeedBasis basis;
35d1bcdac9Sjeremylt   CeedElemRestriction r;
36*d1d35e2fSjeremylt   CeedOperatorField *op_fields;
37*d1d35e2fSjeremylt   CeedQFunctionField *qf_fields;
38fe2413ffSjeremylt   if (inOrOut) {
39*d1d35e2fSjeremylt     ierr = CeedOperatorGetFields(op, NULL, &op_fields);
40e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
41*d1d35e2fSjeremylt     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields);
42e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
43fe2413ffSjeremylt   } else {
44*d1d35e2fSjeremylt     ierr = CeedOperatorGetFields(op, &op_fields, NULL);
45e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
46*d1d35e2fSjeremylt     ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL);
47e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
48fe2413ffSjeremylt   }
49*d1d35e2fSjeremylt   const CeedInt blk_size = 8;
504a2e7687Sjeremylt 
514a2e7687Sjeremylt   // Loop over fields
524a2e7687Sjeremylt   for (CeedInt i=0; i<numfields; i++) {
53*d1d35e2fSjeremylt     CeedEvalMode eval_mode;
54*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
55*d1d35e2fSjeremylt     CeedChkBackend(ierr);
564a2e7687Sjeremylt 
57*d1d35e2fSjeremylt     if (eval_mode != CEED_EVAL_WEIGHT) {
58*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
59e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
60e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
61*d1d35e2fSjeremylt       CeedInt num_elem, elem_size, l_size, comp_stride;
62*d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
63*d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
64*d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
65*d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
66bd33150aSjeremylt 
673ac43b2cSJeremy L Thompson       bool strided;
68e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
693ac43b2cSJeremy L Thompson       if (strided) {
703ac43b2cSJeremy L Thompson         CeedInt strides[3];
71e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
72*d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
73*d1d35e2fSjeremylt                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
74e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
753ac43b2cSJeremy L Thompson       } else {
76bd33150aSjeremylt         const CeedInt *offsets = NULL;
77bd33150aSjeremylt         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
78e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
79*d1d35e2fSjeremylt         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
80*d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
81*d1d35e2fSjeremylt                                                 blk_size, num_comp, comp_stride,
82*d1d35e2fSjeremylt                                                 l_size, CEED_MEM_HOST,
83bd33150aSjeremylt                                                 CEED_COPY_VALUES, offsets,
84*d1d35e2fSjeremylt                                                 &blk_restr[i+start_e]);
85e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
86e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
873ac43b2cSJeremy L Thompson       }
88*d1d35e2fSjeremylt       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
89*d1d35e2fSjeremylt                                              &full_evecs[i+start_e]);
90e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
914a2e7687Sjeremylt     }
924a2e7687Sjeremylt 
93*d1d35e2fSjeremylt     switch(eval_mode) {
944a2e7687Sjeremylt     case CEED_EVAL_NONE:
95*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
96*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
97*d1d35e2fSjeremylt       CeedChkBackend(ierr);
98aedaa0e5Sjeremylt       break;
99aedaa0e5Sjeremylt     case CEED_EVAL_INTERP:
100*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
1014d1cd9fcSJeremy L Thompson       ierr = CeedElemRestrictionGetElementSize(r, &P);
102e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
103*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]);
104*d1d35e2fSjeremylt       CeedChkBackend(ierr);
105*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
106*d1d35e2fSjeremylt       CeedChkBackend(ierr);
1074a2e7687Sjeremylt       break;
1084a2e7687Sjeremylt     case CEED_EVAL_GRAD:
109*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
110*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
111e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
1124d1cd9fcSJeremy L Thompson       ierr = CeedElemRestrictionGetElementSize(r, &P);
113e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
114*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]);
115e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
116*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
117*d1d35e2fSjeremylt       CeedChkBackend(ierr);
1184a2e7687Sjeremylt       break;
1194a2e7687Sjeremylt     case CEED_EVAL_WEIGHT: // Only on input fields
120*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
121*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
122*d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
123*d1d35e2fSjeremylt                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
124e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
125aedaa0e5Sjeremylt 
1264a2e7687Sjeremylt       break;
1274a2e7687Sjeremylt     case CEED_EVAL_DIV:
1284d537eeaSYohann       break; // Not implemented
1294a2e7687Sjeremylt     case CEED_EVAL_CURL:
1304d537eeaSYohann       break; // Not implemented
1314a2e7687Sjeremylt     }
1324a2e7687Sjeremylt   }
133e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1344a2e7687Sjeremylt }
1354a2e7687Sjeremylt 
136f10650afSjeremylt //------------------------------------------------------------------------------
137f10650afSjeremylt // Setup Operator
138f10650afSjeremylt //------------------------------------------------------------------------------
1394a2e7687Sjeremylt static int CeedOperatorSetup_Blocked(CeedOperator op) {
1404a2e7687Sjeremylt   int ierr;
141*d1d35e2fSjeremylt   bool setup_done;
142*d1d35e2fSjeremylt   ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr);
143*d1d35e2fSjeremylt   if (setup_done) return CEED_ERROR_SUCCESS;
144aedaa0e5Sjeremylt   Ceed ceed;
145e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
1464ce2993fSjeremylt   CeedOperator_Blocked *impl;
147e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
1484ce2993fSjeremylt   CeedQFunction qf;
149e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
150*d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields;
151e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
152*d1d35e2fSjeremylt   ierr = CeedQFunctionIsIdentity(qf, &impl->identity_qf); CeedChkBackend(ierr);
153*d1d35e2fSjeremylt   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
154e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
155*d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
156*d1d35e2fSjeremylt   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
157e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
158*d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
159*d1d35e2fSjeremylt   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
160e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1614a2e7687Sjeremylt 
1624a2e7687Sjeremylt   // Allocate
163*d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
164e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
165*d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs);
166e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
167*d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data);
168e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1694a2e7687Sjeremylt 
170*d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->input_state); CeedChkBackend(ierr);
171*d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->e_vecs_in); CeedChkBackend(ierr);
172*d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->e_vecs_out); CeedChkBackend(ierr);
173*d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->q_vecs_in); CeedChkBackend(ierr);
174*d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->q_vecs_out); CeedChkBackend(ierr);
1754a2e7687Sjeremylt 
176*d1d35e2fSjeremylt   impl->num_e_vecs_in = num_input_fields;
177*d1d35e2fSjeremylt   impl->num_e_vecs_out = num_output_fields;
178aedaa0e5Sjeremylt 
1794a2e7687Sjeremylt   // Set up infield and outfield pointer arrays
1804a2e7687Sjeremylt   // Infields
181*d1d35e2fSjeremylt   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blk_restr,
182*d1d35e2fSjeremylt                                          impl->e_vecs, impl->e_vecs_in,
183*d1d35e2fSjeremylt                                          impl->q_vecs_in, 0,
184*d1d35e2fSjeremylt                                          num_input_fields, Q);
185e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1864a2e7687Sjeremylt   // Outfields
187*d1d35e2fSjeremylt   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blk_restr,
188*d1d35e2fSjeremylt                                          impl->e_vecs, impl->e_vecs_out,
189*d1d35e2fSjeremylt                                          impl->q_vecs_out, num_input_fields,
190*d1d35e2fSjeremylt                                          num_output_fields, Q);
191e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
192aedaa0e5Sjeremylt 
19316911fdaSjeremylt   // Identity QFunctions
194*d1d35e2fSjeremylt   if (impl->identity_qf) {
195*d1d35e2fSjeremylt     CeedEvalMode in_mode, out_mode;
196*d1d35e2fSjeremylt     CeedQFunctionField *in_fields, *out_fields;
197*d1d35e2fSjeremylt     ierr = CeedQFunctionGetFields(qf, &in_fields, &out_fields);
198e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
19916911fdaSjeremylt 
200*d1d35e2fSjeremylt     for (CeedInt i=0; i<num_input_fields; i++) {
201*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetEvalMode(in_fields[i], &in_mode);
202*d1d35e2fSjeremylt       CeedChkBackend(ierr);
203*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetEvalMode(out_fields[i], &out_mode);
204*d1d35e2fSjeremylt       CeedChkBackend(ierr);
205*d1d35e2fSjeremylt 
206*d1d35e2fSjeremylt       ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
207*d1d35e2fSjeremylt       impl->q_vecs_out[i] = impl->q_vecs_in[i];
208*d1d35e2fSjeremylt       ierr = CeedVectorAddReference(impl->q_vecs_in[i]); CeedChkBackend(ierr);
20916911fdaSjeremylt     }
21016911fdaSjeremylt   }
21116911fdaSjeremylt 
212e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
2134a2e7687Sjeremylt 
214e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2154a2e7687Sjeremylt }
2164a2e7687Sjeremylt 
217f10650afSjeremylt //------------------------------------------------------------------------------
218f10650afSjeremylt // Setup Operator Inputs
219f10650afSjeremylt //------------------------------------------------------------------------------
220*d1d35e2fSjeremylt static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields,
221*d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
222*d1d35e2fSjeremylt     CeedVector in_vec, bool skip_active, CeedOperator_Blocked *impl,
22389c6efa4Sjeremylt     CeedRequest *request) {
2241d102b48SJeremy L Thompson   CeedInt ierr;
225*d1d35e2fSjeremylt   CeedEvalMode eval_mode;
226d1bcdac9Sjeremylt   CeedVector vec;
22716c359e6Sjeremylt   uint64_t state;
2284a2e7687Sjeremylt 
229*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
2301d102b48SJeremy L Thompson     // Get input vector
231*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
232*d1d35e2fSjeremylt     CeedChkBackend(ierr);
2331d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
234*d1d35e2fSjeremylt       if (skip_active)
2351d102b48SJeremy L Thompson         continue;
2361d102b48SJeremy L Thompson       else
237*d1d35e2fSjeremylt         vec = in_vec;
2381d102b48SJeremy L Thompson     }
2391d102b48SJeremy L Thompson 
240*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
241e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
242*d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
2434a2e7687Sjeremylt     } else {
2444a2e7687Sjeremylt       // Restrict
245e15f9bd0SJeremy L Thompson       ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
246*d1d35e2fSjeremylt       if (state != impl->input_state[i] || vec == in_vec) {
247*d1d35e2fSjeremylt         ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
248*d1d35e2fSjeremylt                                         vec, impl->e_vecs[i], request);
249e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
250*d1d35e2fSjeremylt         impl->input_state[i] = state;
25116c359e6Sjeremylt       }
2524a2e7687Sjeremylt       // Get evec
253*d1d35e2fSjeremylt       ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST,
254*d1d35e2fSjeremylt                                     (const CeedScalar **) &impl->e_data[i]);
255e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
2564a2e7687Sjeremylt     }
2574a2e7687Sjeremylt   }
258e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2594a2e7687Sjeremylt }
2604a2e7687Sjeremylt 
261f10650afSjeremylt //------------------------------------------------------------------------------
262f10650afSjeremylt // Input Basis Action
263f10650afSjeremylt //------------------------------------------------------------------------------
2641d102b48SJeremy L Thompson static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
265*d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
266*d1d35e2fSjeremylt     CeedInt num_input_fields, CeedInt blk_size, bool skip_active,
2671d102b48SJeremy L Thompson     CeedOperator_Blocked *impl) {
2681d102b48SJeremy L Thompson   CeedInt ierr;
269*d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
270*d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
271*d1d35e2fSjeremylt   CeedEvalMode eval_mode;
2721d102b48SJeremy L Thompson   CeedBasis basis;
2731d102b48SJeremy L Thompson 
274*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
2751d102b48SJeremy L Thompson     // Skip active input
276*d1d35e2fSjeremylt     if (skip_active) {
2771d102b48SJeremy L Thompson       CeedVector vec;
278*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
279*d1d35e2fSjeremylt       CeedChkBackend(ierr);
2801d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
2811d102b48SJeremy L Thompson         continue;
2821d102b48SJeremy L Thompson     }
2831d102b48SJeremy L Thompson 
284*d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
285*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
286e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
287*d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
288e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
289*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
290e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
291*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
292*d1d35e2fSjeremylt     CeedChkBackend(ierr);
2934a2e7687Sjeremylt     // Basis action
294*d1d35e2fSjeremylt     switch(eval_mode) {
2954a2e7687Sjeremylt     case CEED_EVAL_NONE:
296*d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
297*d1d35e2fSjeremylt                                 CEED_USE_POINTER, &impl->e_data[i][e*Q*size]); CeedChkBackend(ierr);
2984a2e7687Sjeremylt       break;
2994a2e7687Sjeremylt     case CEED_EVAL_INTERP:
300*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
301e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
302*d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
303*d1d35e2fSjeremylt                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size]);
304e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
305*d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
306*d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
307*d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
3084a2e7687Sjeremylt       break;
3094a2e7687Sjeremylt     case CEED_EVAL_GRAD:
310*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
311e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
312e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
313*d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
314*d1d35e2fSjeremylt                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size/dim]);
315e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
316*d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
317*d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
318*d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
3194a2e7687Sjeremylt       break;
3204a2e7687Sjeremylt     case CEED_EVAL_WEIGHT:
3214a2e7687Sjeremylt       break;  // No action
322bbfacfcdSjeremylt     // LCOV_EXCL_START
3234a2e7687Sjeremylt     case CEED_EVAL_DIV:
3241d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
325*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
326e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
3271d102b48SJeremy L Thompson       Ceed ceed;
328e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
329e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
330e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
331bbfacfcdSjeremylt       // LCOV_EXCL_STOP
3324a2e7687Sjeremylt     }
3334a2e7687Sjeremylt     }
33489c6efa4Sjeremylt   }
335e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
33689c6efa4Sjeremylt }
3374a2e7687Sjeremylt 
338f10650afSjeremylt //------------------------------------------------------------------------------
339f10650afSjeremylt // Output Basis Action
340f10650afSjeremylt //------------------------------------------------------------------------------
3411d102b48SJeremy L Thompson static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
342*d1d35e2fSjeremylt     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
343*d1d35e2fSjeremylt     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
3441d102b48SJeremy L Thompson     CeedOperator op, CeedOperator_Blocked *impl) {
3451d102b48SJeremy L Thompson   CeedInt ierr;
346*d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
347*d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
348*d1d35e2fSjeremylt   CeedEvalMode eval_mode;
3491d102b48SJeremy L Thompson   CeedBasis basis;
3501d102b48SJeremy L Thompson 
351*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
352*d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
353*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
354e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
355*d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
356e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
357*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
358e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
359*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
360e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
3614a2e7687Sjeremylt     // Basis action
362*d1d35e2fSjeremylt     switch(eval_mode) {
3634a2e7687Sjeremylt     case CEED_EVAL_NONE:
3644a2e7687Sjeremylt       break; // No action
3654a2e7687Sjeremylt     case CEED_EVAL_INTERP:
366*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
367e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
368*d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
369*d1d35e2fSjeremylt                                 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size]);
370e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
371*d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
372*d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
373*d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
3744a2e7687Sjeremylt       break;
3754a2e7687Sjeremylt     case CEED_EVAL_GRAD:
376*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
377e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
378e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
379*d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
380*d1d35e2fSjeremylt                                 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size/dim]);
381e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
382*d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
383*d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
384*d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
3854a2e7687Sjeremylt       break;
386c042f62fSJeremy L Thompson     // LCOV_EXCL_START
387bbfacfcdSjeremylt     case CEED_EVAL_WEIGHT: {
3884ce2993fSjeremylt       Ceed ceed;
389e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
390e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
391e15f9bd0SJeremy L Thompson                        "CEED_EVAL_WEIGHT cannot be an output "
3921d102b48SJeremy L Thompson                        "evaluation mode");
3934ce2993fSjeremylt     }
3944a2e7687Sjeremylt     case CEED_EVAL_DIV:
3951d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
3961d102b48SJeremy L Thompson       Ceed ceed;
397e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
398e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
399e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
400bbfacfcdSjeremylt       // LCOV_EXCL_STOP
4014a2e7687Sjeremylt     }
40289c6efa4Sjeremylt     }
40389c6efa4Sjeremylt   }
404e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4051d102b48SJeremy L Thompson }
4061d102b48SJeremy L Thompson 
407f10650afSjeremylt //------------------------------------------------------------------------------
408f10650afSjeremylt // Restore Input Vectors
409f10650afSjeremylt //------------------------------------------------------------------------------
410*d1d35e2fSjeremylt static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields,
411*d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
412*d1d35e2fSjeremylt     bool skip_active, CeedOperator_Blocked *impl) {
4131d102b48SJeremy L Thompson   CeedInt ierr;
414*d1d35e2fSjeremylt   CeedEvalMode eval_mode;
4151d102b48SJeremy L Thompson 
416*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
4171d102b48SJeremy L Thompson     // Skip active inputs
418*d1d35e2fSjeremylt     if (skip_active) {
4191d102b48SJeremy L Thompson       CeedVector vec;
420*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
421*d1d35e2fSjeremylt       CeedChkBackend(ierr);
4221d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
4231d102b48SJeremy L Thompson         continue;
4241d102b48SJeremy L Thompson     }
425*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
426e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
427*d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
4281d102b48SJeremy L Thompson     } else {
429*d1d35e2fSjeremylt       ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i],
430*d1d35e2fSjeremylt                                         (const CeedScalar **) &impl->e_data[i]);
431e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
4321d102b48SJeremy L Thompson     }
4331d102b48SJeremy L Thompson   }
434e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4351d102b48SJeremy L Thompson }
4361d102b48SJeremy L Thompson 
437f10650afSjeremylt //------------------------------------------------------------------------------
438f10650afSjeremylt // Operator Apply
439f10650afSjeremylt //------------------------------------------------------------------------------
440*d1d35e2fSjeremylt static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec,
441*d1d35e2fSjeremylt                                         CeedVector out_vec,
4421d102b48SJeremy L Thompson                                         CeedRequest *request) {
4431d102b48SJeremy L Thompson   int ierr;
4441d102b48SJeremy L Thompson   CeedOperator_Blocked *impl;
445e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
446*d1d35e2fSjeremylt   const CeedInt blk_size = 8;
447*d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
448*d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
449e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
450*d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
4511d102b48SJeremy L Thompson   CeedQFunction qf;
452e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
453*d1d35e2fSjeremylt   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
454e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
455*d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
456*d1d35e2fSjeremylt   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
457e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
458*d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
459*d1d35e2fSjeremylt   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
460e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
461*d1d35e2fSjeremylt   CeedEvalMode eval_mode;
4621d102b48SJeremy L Thompson   CeedVector vec;
4631d102b48SJeremy L Thompson 
4641d102b48SJeremy L Thompson   // Setup
465e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
4661d102b48SJeremy L Thompson 
4671d102b48SJeremy L Thompson   // Input Evecs and Restriction
468*d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
469*d1d35e2fSjeremylt                                          op_input_fields, in_vec, false, impl,
470e15f9bd0SJeremy L Thompson                                          request); CeedChkBackend(ierr);
4711d102b48SJeremy L Thompson 
4721d102b48SJeremy L Thompson   // Output Evecs
473*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
474*d1d35e2fSjeremylt     ierr = CeedVectorGetArray(impl->e_vecs[i+impl->num_e_vecs_in], CEED_MEM_HOST,
475*d1d35e2fSjeremylt                               &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
4761d102b48SJeremy L Thompson   }
4771d102b48SJeremy L Thompson 
4781d102b48SJeremy L Thompson   // Loop through elements
479*d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
4801d102b48SJeremy L Thompson     // Output pointers
481*d1d35e2fSjeremylt     for (CeedInt i=0; i<num_output_fields; i++) {
482*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
483e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
484*d1d35e2fSjeremylt       if (eval_mode == CEED_EVAL_NONE) {
485*d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
486e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
487*d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
488*d1d35e2fSjeremylt                                   CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*Q*size]);
489e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
4901d102b48SJeremy L Thompson       }
4911d102b48SJeremy L Thompson     }
4921d102b48SJeremy L Thompson 
49316911fdaSjeremylt     // Input basis apply
494*d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
495*d1d35e2fSjeremylt                                           num_input_fields, blk_size, false, impl);
496e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
49716911fdaSjeremylt 
4981d102b48SJeremy L Thompson     // Q function
499*d1d35e2fSjeremylt     if (!impl->identity_qf) {
500*d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
501e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
50216911fdaSjeremylt     }
5031d102b48SJeremy L Thompson 
5041d102b48SJeremy L Thompson     // Output basis apply
505*d1d35e2fSjeremylt     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields,
506*d1d35e2fSjeremylt                                            blk_size, num_input_fields,
507*d1d35e2fSjeremylt                                            num_output_fields, op, impl);
508e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
5091d102b48SJeremy L Thompson   }
51089c6efa4Sjeremylt 
51189c6efa4Sjeremylt   // Output restriction
512*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
51389c6efa4Sjeremylt     // Restore evec
514*d1d35e2fSjeremylt     ierr = CeedVectorRestoreArray(impl->e_vecs[i+impl->num_e_vecs_in],
515*d1d35e2fSjeremylt                                   &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
516d1bcdac9Sjeremylt     // Get output vector
517*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
518e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
51989c6efa4Sjeremylt     // Active
520d1bcdac9Sjeremylt     if (vec == CEED_VECTOR_ACTIVE)
521*d1d35e2fSjeremylt       vec = out_vec;
5224a2e7687Sjeremylt     // Restrict
523*d1d35e2fSjeremylt     ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_e_vecs_in],
524*d1d35e2fSjeremylt                                     CEED_TRANSPOSE, impl->e_vecs[i+impl->num_e_vecs_in],
525e15f9bd0SJeremy L Thompson                                     vec, request); CeedChkBackend(ierr);
52689c6efa4Sjeremylt 
5274a2e7687Sjeremylt   }
5284a2e7687Sjeremylt 
5294a2e7687Sjeremylt   // Restore input arrays
530*d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
531*d1d35e2fSjeremylt          op_input_fields, false, impl); CeedChkBackend(ierr);
5321d102b48SJeremy L Thompson 
533e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
5341d102b48SJeremy L Thompson }
5351d102b48SJeremy L Thompson 
536f10650afSjeremylt //------------------------------------------------------------------------------
5371d102b48SJeremy L Thompson // Assemble Linear QFunction
538f10650afSjeremylt //------------------------------------------------------------------------------
53980ac2e43SJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
5401d102b48SJeremy L Thompson     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
5411d102b48SJeremy L Thompson   int ierr;
5421d102b48SJeremy L Thompson   CeedOperator_Blocked *impl;
543e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
544*d1d35e2fSjeremylt   const CeedInt blk_size = 8;
545*d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
546*d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
547e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
548*d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
5491d102b48SJeremy L Thompson   CeedQFunction qf;
550e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
551*d1d35e2fSjeremylt   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
552e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
553*d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
554*d1d35e2fSjeremylt   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
555e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
556*d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
557*d1d35e2fSjeremylt   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
558e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
5591d102b48SJeremy L Thompson   CeedVector vec, lvec;
560*d1d35e2fSjeremylt   CeedInt num_active_in = 0, num_active_out = 0;
561*d1d35e2fSjeremylt   CeedVector *active_in = NULL;
5621d102b48SJeremy L Thompson   CeedScalar *a, *tmp;
5631d102b48SJeremy L Thompson   Ceed ceed;
564e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
5651d102b48SJeremy L Thompson 
5661d102b48SJeremy L Thompson   // Setup
567e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
5681d102b48SJeremy L Thompson 
56916911fdaSjeremylt   // Check for identity
570*d1d35e2fSjeremylt   if (impl->identity_qf)
57116911fdaSjeremylt     // LCOV_EXCL_START
572e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
573*d1d35e2fSjeremylt                      "Assembling identity QFunctions not supported");
57416911fdaSjeremylt   // LCOV_EXCL_STOP
57516911fdaSjeremylt 
5761d102b48SJeremy L Thompson   // Input Evecs and Restriction
577*d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
578*d1d35e2fSjeremylt                                          op_input_fields, NULL, true, impl,
579e15f9bd0SJeremy L Thompson                                          request); CeedChkBackend(ierr);
5801d102b48SJeremy L Thompson 
5811d102b48SJeremy L Thompson   // Count number of active input fields
582*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
5831d102b48SJeremy L Thompson     // Get input vector
584*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
585*d1d35e2fSjeremylt     CeedChkBackend(ierr);
5861d102b48SJeremy L Thompson     // Check if active input
5871d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
588*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
589e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
590*d1d35e2fSjeremylt       ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
591*d1d35e2fSjeremylt       ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
592*d1d35e2fSjeremylt       CeedChkBackend(ierr);
593*d1d35e2fSjeremylt       ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
5941d102b48SJeremy L Thompson       for (CeedInt field=0; field<size; field++) {
595*d1d35e2fSjeremylt         ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
596e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
597*d1d35e2fSjeremylt         ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
598*d1d35e2fSjeremylt                                   CEED_USE_POINTER, &tmp[field*Q*blk_size]);
599e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
6001d102b48SJeremy L Thompson       }
601*d1d35e2fSjeremylt       num_active_in += size;
602*d1d35e2fSjeremylt       ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
6031d102b48SJeremy L Thompson     }
6041d102b48SJeremy L Thompson   }
6051d102b48SJeremy L Thompson 
6061d102b48SJeremy L Thompson   // Count number of active output fields
607*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
6081d102b48SJeremy L Thompson     // Get output vector
609*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
610e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
6111d102b48SJeremy L Thompson     // Check if active output
6121d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
613*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
614e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
615*d1d35e2fSjeremylt       num_active_out += size;
6161d102b48SJeremy L Thompson     }
6171d102b48SJeremy L Thompson   }
6181d102b48SJeremy L Thompson 
6191d102b48SJeremy L Thompson   // Check sizes
620*d1d35e2fSjeremylt   if (!num_active_in || !num_active_out)
6211d102b48SJeremy L Thompson     // LCOV_EXCL_START
622e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
623e15f9bd0SJeremy L Thompson                      "Cannot assemble QFunction without active inputs "
6241d102b48SJeremy L Thompson                      "and outputs");
6251d102b48SJeremy L Thompson   // LCOV_EXCL_STOP
6261d102b48SJeremy L Thompson 
627*d1d35e2fSjeremylt   // Setup Lvec
628*d1d35e2fSjeremylt   ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
629e15f9bd0SJeremy L Thompson                           &lvec); CeedChkBackend(ierr);
630e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
6311d102b48SJeremy L Thompson 
6321d102b48SJeremy L Thompson   // Create output restriction
633*d1d35e2fSjeremylt   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
634*d1d35e2fSjeremylt   ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
635*d1d35e2fSjeremylt                                           num_active_in*num_active_out,
636*d1d35e2fSjeremylt                                           num_active_in*num_active_out*num_elem*Q,
637e15f9bd0SJeremy L Thompson                                           strides, rstr); CeedChkBackend(ierr);
6381d102b48SJeremy L Thompson   // Create assembled vector
639*d1d35e2fSjeremylt   ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
640e15f9bd0SJeremy L Thompson                           assembled); CeedChkBackend(ierr);
6411d102b48SJeremy L Thompson 
6421d102b48SJeremy L Thompson   // Loop through elements
643*d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
6441d102b48SJeremy L Thompson     // Input basis apply
645*d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
646*d1d35e2fSjeremylt                                           num_input_fields, blk_size, true, impl);
647e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
6481d102b48SJeremy L Thompson 
6491d102b48SJeremy L Thompson     // Assemble QFunction
650*d1d35e2fSjeremylt     for (CeedInt in=0; in<num_active_in; in++) {
6511d102b48SJeremy L Thompson       // Set Inputs
652*d1d35e2fSjeremylt       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
653*d1d35e2fSjeremylt       if (num_active_in > 1) {
654*d1d35e2fSjeremylt         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
655e15f9bd0SJeremy L Thompson                                   0.0); CeedChkBackend(ierr);
65642ea3801Sjeremylt       }
6571d102b48SJeremy L Thompson       // Set Outputs
658*d1d35e2fSjeremylt       for (CeedInt out=0; out<num_output_fields; out++) {
6591d102b48SJeremy L Thompson         // Get output vector
660*d1d35e2fSjeremylt         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
661e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
6621d102b48SJeremy L Thompson         // Check if active output
6631d102b48SJeremy L Thompson         if (vec == CEED_VECTOR_ACTIVE) {
664*d1d35e2fSjeremylt           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
665e15f9bd0SJeremy L Thompson                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
666*d1d35e2fSjeremylt           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
667e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
668*d1d35e2fSjeremylt           a += size*Q*blk_size; // Advance the pointer by the size of the output
6691d102b48SJeremy L Thompson         }
6701d102b48SJeremy L Thompson       }
6711d102b48SJeremy L Thompson       // Apply QFunction
672*d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
673e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6744a2e7687Sjeremylt     }
6754a2e7687Sjeremylt   }
6764a2e7687Sjeremylt 
6771d102b48SJeremy L Thompson   // Un-set output Qvecs to prevent accidental overwrite of Assembled
678*d1d35e2fSjeremylt   for (CeedInt out=0; out<num_output_fields; out++) {
6791d102b48SJeremy L Thompson     // Get output vector
680*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
681e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
6821d102b48SJeremy L Thompson     // Check if active output
6831d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
684*d1d35e2fSjeremylt       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
685e15f9bd0SJeremy L Thompson                          NULL); CeedChkBackend(ierr);
6861d102b48SJeremy L Thompson     }
6871d102b48SJeremy L Thompson   }
6881d102b48SJeremy L Thompson 
6891d102b48SJeremy L Thompson   // Restore input arrays
690*d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
691*d1d35e2fSjeremylt          op_input_fields, true, impl); CeedChkBackend(ierr);
6921d102b48SJeremy L Thompson 
6931d102b48SJeremy L Thompson   // Output blocked restriction
694e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr);
695e15f9bd0SJeremy L Thompson   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
696*d1d35e2fSjeremylt   CeedElemRestriction blk_rstr;
697*d1d35e2fSjeremylt   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
698*d1d35e2fSjeremylt          num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
699*d1d35e2fSjeremylt          strides, &blk_rstr); CeedChkBackend(ierr);
700*d1d35e2fSjeremylt   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, lvec, *assembled,
701e15f9bd0SJeremy L Thompson                                   request); CeedChkBackend(ierr);
7021d102b48SJeremy L Thompson 
7031d102b48SJeremy L Thompson   // Cleanup
704*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_active_in; i++) {
705*d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&active_in[i]); CeedChkBackend(ierr);
70642ea3801Sjeremylt   }
707*d1d35e2fSjeremylt   ierr = CeedFree(&active_in); CeedChkBackend(ierr);
708e15f9bd0SJeremy L Thompson   ierr = CeedVectorDestroy(&lvec); CeedChkBackend(ierr);
709*d1d35e2fSjeremylt   ierr = CeedElemRestrictionDestroy(&blk_rstr); CeedChkBackend(ierr);
7101d102b48SJeremy L Thompson 
711e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
7124a2e7687Sjeremylt }
7134a2e7687Sjeremylt 
714f10650afSjeremylt //------------------------------------------------------------------------------
715f10650afSjeremylt // Operator Destroy
716f10650afSjeremylt //------------------------------------------------------------------------------
717f10650afSjeremylt static int CeedOperatorDestroy_Blocked(CeedOperator op) {
718f10650afSjeremylt   int ierr;
719f10650afSjeremylt   CeedOperator_Blocked *impl;
720e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
721f10650afSjeremylt 
722*d1d35e2fSjeremylt   for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) {
723*d1d35e2fSjeremylt     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
724*d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr);
725f10650afSjeremylt   }
726*d1d35e2fSjeremylt   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
727*d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr);
728*d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr);
729*d1d35e2fSjeremylt   ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr);
730f10650afSjeremylt 
731*d1d35e2fSjeremylt   for (CeedInt i=0; i<impl->num_e_vecs_in; i++) {
732*d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
733*d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
734f10650afSjeremylt   }
735*d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
736*d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
737f10650afSjeremylt 
738*d1d35e2fSjeremylt   for (CeedInt i=0; i<impl->num_e_vecs_out; i++) {
739*d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
740*d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
741f10650afSjeremylt   }
742*d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
743*d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
744f10650afSjeremylt 
745e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
746e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
747f10650afSjeremylt }
748f10650afSjeremylt 
749f10650afSjeremylt //------------------------------------------------------------------------------
750f10650afSjeremylt // Operator Create
751f10650afSjeremylt //------------------------------------------------------------------------------
7524a2e7687Sjeremylt int CeedOperatorCreate_Blocked(CeedOperator op) {
7534a2e7687Sjeremylt   int ierr;
754fe2413ffSjeremylt   Ceed ceed;
755e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
7564ce2993fSjeremylt   CeedOperator_Blocked *impl;
7574a2e7687Sjeremylt 
758e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
759e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
760fe2413ffSjeremylt 
76180ac2e43SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
76280ac2e43SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunction_Blocked);
763e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
764cae8b89aSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
765e15f9bd0SJeremy L Thompson                                 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr);
766fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
767e15f9bd0SJeremy L Thompson                                 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr);
768e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
7694a2e7687Sjeremylt }
770f10650afSjeremylt //------------------------------------------------------------------------------
771