xref: /libCEED/rust/libceed-sys/c-src/backends/blocked/ceed-blocked-operator.c (revision d264344313546611a0e282df1f09990e3ff407ce)
14a2e7687Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
24a2e7687Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
34a2e7687Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
44a2e7687Sjeremylt //
54a2e7687Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
64a2e7687Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
74a2e7687Sjeremylt // element discretizations for exascale applications. For more information and
84a2e7687Sjeremylt // source code availability see http://github.com/ceed.
94a2e7687Sjeremylt //
104a2e7687Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
114a2e7687Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
124a2e7687Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
134a2e7687Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
144a2e7687Sjeremylt // software, applications, hardware, advanced system engineering and early
154a2e7687Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
164a2e7687Sjeremylt 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
193d576824SJeremy L Thompson #include <stdbool.h>
203d576824SJeremy L Thompson #include <stddef.h>
213d576824SJeremy L Thompson #include <stdint.h>
224a2e7687Sjeremylt #include "ceed-blocked.h"
234a2e7687Sjeremylt 
24f10650afSjeremylt //------------------------------------------------------------------------------
25f10650afSjeremylt // Setup Input/Output Fields
26f10650afSjeremylt //------------------------------------------------------------------------------
2789c6efa4Sjeremylt static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
284fc1f125SJeremy L Thompson     CeedOperator op, bool is_input, CeedElemRestriction *blk_restr,
294fc1f125SJeremy L Thompson     CeedVector *e_vecs_full, CeedVector *e_vecs, CeedVector *q_vecs,
304fc1f125SJeremy L Thompson     CeedInt start_e, CeedInt num_fields, CeedInt Q) {
31ebbcc8a3SJeremy L Thompson   CeedInt ierr, num_comp, size, P;
32*d2643443SJeremy L Thompson   CeedSize e_size, q_size;
33aedaa0e5Sjeremylt   Ceed ceed;
34e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
35d1bcdac9Sjeremylt   CeedBasis basis;
36d1bcdac9Sjeremylt   CeedElemRestriction r;
37d1d35e2fSjeremylt   CeedOperatorField *op_fields;
38d1d35e2fSjeremylt   CeedQFunctionField *qf_fields;
394fc1f125SJeremy L Thompson   if (is_input) {
407e7773b5SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
41e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
427e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
43e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
444fc1f125SJeremy L Thompson   } else {
454fc1f125SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields);
464fc1f125SJeremy L Thompson     CeedChkBackend(ierr);
474fc1f125SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
484fc1f125SJeremy L Thompson     CeedChkBackend(ierr);
49fe2413ffSjeremylt   }
50d1d35e2fSjeremylt   const CeedInt blk_size = 8;
514a2e7687Sjeremylt 
524a2e7687Sjeremylt   // Loop over fields
534fc1f125SJeremy L Thompson   for (CeedInt i=0; i<num_fields; i++) {
54d1d35e2fSjeremylt     CeedEvalMode eval_mode;
55d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
56d1d35e2fSjeremylt     CeedChkBackend(ierr);
574a2e7687Sjeremylt 
58d1d35e2fSjeremylt     if (eval_mode != CEED_EVAL_WEIGHT) {
59d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
60e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
61e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
62e79b91d9SJeremy L Thompson       CeedSize l_size;
63e79b91d9SJeremy L Thompson       CeedInt num_elem, elem_size, comp_stride;
64d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
65d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
66d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
67d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
68bd33150aSjeremylt 
693ac43b2cSJeremy L Thompson       bool strided;
70e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
713ac43b2cSJeremy L Thompson       if (strided) {
723ac43b2cSJeremy L Thompson         CeedInt strides[3];
73e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
74d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
75d1d35e2fSjeremylt                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
76e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
773ac43b2cSJeremy L Thompson       } else {
78bd33150aSjeremylt         const CeedInt *offsets = NULL;
79bd33150aSjeremylt         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
80e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
81d1d35e2fSjeremylt         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
82d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
83d1d35e2fSjeremylt                                                 blk_size, num_comp, comp_stride,
84d1d35e2fSjeremylt                                                 l_size, CEED_MEM_HOST,
85bd33150aSjeremylt                                                 CEED_COPY_VALUES, offsets,
86d1d35e2fSjeremylt                                                 &blk_restr[i+start_e]);
87e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
88e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
893ac43b2cSJeremy L Thompson       }
90d1d35e2fSjeremylt       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
914fc1f125SJeremy L Thompson                                              &e_vecs_full[i+start_e]);
92e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
934a2e7687Sjeremylt     }
944a2e7687Sjeremylt 
95d1d35e2fSjeremylt     switch(eval_mode) {
964a2e7687Sjeremylt     case CEED_EVAL_NONE:
97d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
98*d2643443SJeremy L Thompson       q_size = (CeedSize)Q*size*blk_size;
99*d2643443SJeremy L Thompson       ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr);
100aedaa0e5Sjeremylt       break;
101aedaa0e5Sjeremylt     case CEED_EVAL_INTERP:
1024a2e7687Sjeremylt     case CEED_EVAL_GRAD:
103d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
104d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
105ebbcc8a3SJeremy L Thompson       ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr);
106ebbcc8a3SJeremy L Thompson       ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
107*d2643443SJeremy L Thompson       e_size = (CeedSize)P*num_comp*blk_size;
108*d2643443SJeremy L Thompson       ierr = CeedVectorCreate(ceed, e_size, &e_vecs[i]); CeedChkBackend(ierr);
109*d2643443SJeremy L Thompson       q_size = (CeedSize)Q*size*blk_size;
110*d2643443SJeremy L Thompson       ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr);
1114a2e7687Sjeremylt       break;
1124a2e7687Sjeremylt     case CEED_EVAL_WEIGHT: // Only on input fields
113d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
114*d2643443SJeremy L Thompson       q_size = (CeedSize)Q*blk_size;
115*d2643443SJeremy L Thompson       ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr);
116d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
117d1d35e2fSjeremylt                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
118e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
119aedaa0e5Sjeremylt 
1204a2e7687Sjeremylt       break;
1214a2e7687Sjeremylt     case CEED_EVAL_DIV:
1224d537eeaSYohann       break; // Not implemented
1234a2e7687Sjeremylt     case CEED_EVAL_CURL:
1244d537eeaSYohann       break; // Not implemented
1254a2e7687Sjeremylt     }
1264a2e7687Sjeremylt   }
127e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1284a2e7687Sjeremylt }
1294a2e7687Sjeremylt 
130f10650afSjeremylt //------------------------------------------------------------------------------
131f10650afSjeremylt // Setup Operator
132f10650afSjeremylt //------------------------------------------------------------------------------
1334a2e7687Sjeremylt static int CeedOperatorSetup_Blocked(CeedOperator op) {
1344a2e7687Sjeremylt   int ierr;
1358c1105f8SJeremy L Thompson   bool is_setup_done;
1368c1105f8SJeremy L Thompson   ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr);
1378c1105f8SJeremy L Thompson   if (is_setup_done) return CEED_ERROR_SUCCESS;
138aedaa0e5Sjeremylt   Ceed ceed;
139e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
1404ce2993fSjeremylt   CeedOperator_Blocked *impl;
141e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
1424ce2993fSjeremylt   CeedQFunction qf;
143e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
144d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields;
145e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
1460b454692Sjeremylt   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
147d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
1487e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
1497e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
150e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
151d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
1527e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
1537e7773b5SJeremy L Thompson                                 &qf_output_fields);
154e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1554a2e7687Sjeremylt 
1564a2e7687Sjeremylt   // Allocate
157d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
158e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1594fc1f125SJeremy L Thompson   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full);
160e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1614a2e7687Sjeremylt 
1624fc1f125SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr);
163bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
164bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
165bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
166bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
1674a2e7687Sjeremylt 
1684fc1f125SJeremy L Thompson   impl->num_inputs = num_input_fields;
1694fc1f125SJeremy L Thompson   impl->num_outputs = num_output_fields;
170aedaa0e5Sjeremylt 
1714a2e7687Sjeremylt   // Set up infield and outfield pointer arrays
1724a2e7687Sjeremylt   // Infields
1734fc1f125SJeremy L Thompson   ierr = CeedOperatorSetupFields_Blocked(qf, op, true, impl->blk_restr,
1744fc1f125SJeremy L Thompson                                          impl->e_vecs_full, impl->e_vecs_in,
175d1d35e2fSjeremylt                                          impl->q_vecs_in, 0,
176d1d35e2fSjeremylt                                          num_input_fields, Q);
177e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1784a2e7687Sjeremylt   // Outfields
1794fc1f125SJeremy L Thompson   ierr = CeedOperatorSetupFields_Blocked(qf, op, false, impl->blk_restr,
1804fc1f125SJeremy L Thompson                                          impl->e_vecs_full, impl->e_vecs_out,
181d1d35e2fSjeremylt                                          impl->q_vecs_out, num_input_fields,
182d1d35e2fSjeremylt                                          num_output_fields, Q);
183e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
184aedaa0e5Sjeremylt 
18516911fdaSjeremylt   // Identity QFunctions
1860b454692Sjeremylt   if (impl->is_identity_qf) {
187d1d35e2fSjeremylt     CeedEvalMode in_mode, out_mode;
188d1d35e2fSjeremylt     CeedQFunctionField *in_fields, *out_fields;
1897e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
190e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
1910b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
192d1d35e2fSjeremylt     CeedChkBackend(ierr);
1930b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
194d1d35e2fSjeremylt     CeedChkBackend(ierr);
195d1d35e2fSjeremylt 
1960b454692Sjeremylt     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
1970b454692Sjeremylt       impl->is_identity_restr_op = true;
1980b454692Sjeremylt     } else {
1990b454692Sjeremylt       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
2000b454692Sjeremylt       impl->q_vecs_out[0] = impl->q_vecs_in[0];
2010b454692Sjeremylt       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
20216911fdaSjeremylt     }
20316911fdaSjeremylt   }
20416911fdaSjeremylt 
205e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
2064a2e7687Sjeremylt 
207e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2084a2e7687Sjeremylt }
2094a2e7687Sjeremylt 
210f10650afSjeremylt //------------------------------------------------------------------------------
211f10650afSjeremylt // Setup Operator Inputs
212f10650afSjeremylt //------------------------------------------------------------------------------
213d1d35e2fSjeremylt static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields,
214d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
2154fc1f125SJeremy L Thompson     CeedVector in_vec, bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX],
216a0162de9SJeremy L Thompson     CeedOperator_Blocked *impl, CeedRequest *request) {
2171d102b48SJeremy L Thompson   CeedInt ierr;
218d1d35e2fSjeremylt   CeedEvalMode eval_mode;
219d1bcdac9Sjeremylt   CeedVector vec;
22016c359e6Sjeremylt   uint64_t state;
2214a2e7687Sjeremylt 
222d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
2231d102b48SJeremy L Thompson     // Get input vector
224d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
225d1d35e2fSjeremylt     CeedChkBackend(ierr);
2261d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
227d1d35e2fSjeremylt       if (skip_active)
2281d102b48SJeremy L Thompson         continue;
2291d102b48SJeremy L Thompson       else
230d1d35e2fSjeremylt         vec = in_vec;
2311d102b48SJeremy L Thompson     }
2321d102b48SJeremy L Thompson 
233d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
234e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
235d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
2364a2e7687Sjeremylt     } else {
2374a2e7687Sjeremylt       // Restrict
238e15f9bd0SJeremy L Thompson       ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
2394fc1f125SJeremy L Thompson       if (state != impl->input_states[i] || vec == in_vec) {
240d1d35e2fSjeremylt         ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
2414fc1f125SJeremy L Thompson                                         vec, impl->e_vecs_full[i], request);
242e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
2434fc1f125SJeremy L Thompson         impl->input_states[i] = state;
24416c359e6Sjeremylt       }
2454a2e7687Sjeremylt       // Get evec
2464fc1f125SJeremy L Thompson       ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST,
2474fc1f125SJeremy L Thompson                                     (const CeedScalar **) &e_data_full[i]);
248e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
2494a2e7687Sjeremylt     }
2504a2e7687Sjeremylt   }
251e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2524a2e7687Sjeremylt }
2534a2e7687Sjeremylt 
254f10650afSjeremylt //------------------------------------------------------------------------------
255f10650afSjeremylt // Input Basis Action
256f10650afSjeremylt //------------------------------------------------------------------------------
2571d102b48SJeremy L Thompson static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
258d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
259d1d35e2fSjeremylt     CeedInt num_input_fields, CeedInt blk_size, bool skip_active,
2604fc1f125SJeremy L Thompson     CeedScalar *e_data_full[2*CEED_FIELD_MAX], CeedOperator_Blocked *impl) {
2611d102b48SJeremy L Thompson   CeedInt ierr;
262d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
263d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
264d1d35e2fSjeremylt   CeedEvalMode eval_mode;
2651d102b48SJeremy L Thompson   CeedBasis basis;
2661d102b48SJeremy L Thompson 
267d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
2681d102b48SJeremy L Thompson     // Skip active input
269d1d35e2fSjeremylt     if (skip_active) {
2701d102b48SJeremy L Thompson       CeedVector vec;
271d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
272d1d35e2fSjeremylt       CeedChkBackend(ierr);
2731d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
2741d102b48SJeremy L Thompson         continue;
2751d102b48SJeremy L Thompson     }
2761d102b48SJeremy L Thompson 
277d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
278d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
279e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
280d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
281e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
282d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
283e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
284d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
285d1d35e2fSjeremylt     CeedChkBackend(ierr);
2864a2e7687Sjeremylt     // Basis action
287d1d35e2fSjeremylt     switch(eval_mode) {
2884a2e7687Sjeremylt     case CEED_EVAL_NONE:
289d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
2904fc1f125SJeremy L Thompson                                 CEED_USE_POINTER, &e_data_full[i][e*Q*size]);
291a0162de9SJeremy L Thompson       CeedChkBackend(ierr);
2924a2e7687Sjeremylt       break;
2934a2e7687Sjeremylt     case CEED_EVAL_INTERP:
294d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
295e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
296d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
2974fc1f125SJeremy L Thompson                                 CEED_USE_POINTER, &e_data_full[i][e*elem_size*size]);
298e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
299d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
300d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
301d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
3024a2e7687Sjeremylt       break;
3034a2e7687Sjeremylt     case CEED_EVAL_GRAD:
304d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
305e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
306e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
307d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
3084fc1f125SJeremy L Thompson                                 CEED_USE_POINTER, &e_data_full[i][e*elem_size*size/dim]);
309e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
310d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
311d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
312d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
3134a2e7687Sjeremylt       break;
3144a2e7687Sjeremylt     case CEED_EVAL_WEIGHT:
3154a2e7687Sjeremylt       break;  // No action
316bbfacfcdSjeremylt     // LCOV_EXCL_START
3174a2e7687Sjeremylt     case CEED_EVAL_DIV:
3181d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
319d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
320e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
3211d102b48SJeremy L Thompson       Ceed ceed;
322e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
323e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
324e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
325bbfacfcdSjeremylt       // LCOV_EXCL_STOP
3264a2e7687Sjeremylt     }
3274a2e7687Sjeremylt     }
32889c6efa4Sjeremylt   }
329e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
33089c6efa4Sjeremylt }
3314a2e7687Sjeremylt 
332f10650afSjeremylt //------------------------------------------------------------------------------
333f10650afSjeremylt // Output Basis Action
334f10650afSjeremylt //------------------------------------------------------------------------------
3351d102b48SJeremy L Thompson static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
336d1d35e2fSjeremylt     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
337d1d35e2fSjeremylt     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
3384fc1f125SJeremy L Thompson     CeedOperator op, CeedScalar *e_data_full[2*CEED_FIELD_MAX],
339a0162de9SJeremy L Thompson     CeedOperator_Blocked *impl) {
3401d102b48SJeremy L Thompson   CeedInt ierr;
341d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
342d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
343d1d35e2fSjeremylt   CeedEvalMode eval_mode;
3441d102b48SJeremy L Thompson   CeedBasis basis;
3451d102b48SJeremy L Thompson 
346d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
347d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
348d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
349e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
350d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
351e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
352d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
353e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
354d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
355e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
3564a2e7687Sjeremylt     // Basis action
357d1d35e2fSjeremylt     switch(eval_mode) {
3584a2e7687Sjeremylt     case CEED_EVAL_NONE:
3594a2e7687Sjeremylt       break; // No action
3604a2e7687Sjeremylt     case CEED_EVAL_INTERP:
361d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
362e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
363d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
3644fc1f125SJeremy L Thompson                                 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size]);
365e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
366d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
367d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
368d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
3694a2e7687Sjeremylt       break;
3704a2e7687Sjeremylt     case CEED_EVAL_GRAD:
371d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
372e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
373e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
374d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
3754fc1f125SJeremy L Thompson                                 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size/dim]);
376e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
377d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
378d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
379d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
3804a2e7687Sjeremylt       break;
381c042f62fSJeremy L Thompson     // LCOV_EXCL_START
382bbfacfcdSjeremylt     case CEED_EVAL_WEIGHT: {
3834ce2993fSjeremylt       Ceed ceed;
384e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
385e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
386e15f9bd0SJeremy L Thompson                        "CEED_EVAL_WEIGHT cannot be an output "
3871d102b48SJeremy L Thompson                        "evaluation mode");
3884ce2993fSjeremylt     }
3894a2e7687Sjeremylt     case CEED_EVAL_DIV:
3901d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
3911d102b48SJeremy L Thompson       Ceed ceed;
392e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
393e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
394e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
395bbfacfcdSjeremylt       // LCOV_EXCL_STOP
3964a2e7687Sjeremylt     }
39789c6efa4Sjeremylt     }
39889c6efa4Sjeremylt   }
399e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4001d102b48SJeremy L Thompson }
4011d102b48SJeremy L Thompson 
402f10650afSjeremylt //------------------------------------------------------------------------------
403f10650afSjeremylt // Restore Input Vectors
404f10650afSjeremylt //------------------------------------------------------------------------------
405d1d35e2fSjeremylt static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields,
406d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
4074fc1f125SJeremy L Thompson     bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX],
408a0162de9SJeremy L Thompson     CeedOperator_Blocked *impl) {
4091d102b48SJeremy L Thompson   CeedInt ierr;
410d1d35e2fSjeremylt   CeedEvalMode eval_mode;
4111d102b48SJeremy L Thompson 
412d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
4131d102b48SJeremy L Thompson     // Skip active inputs
414d1d35e2fSjeremylt     if (skip_active) {
4151d102b48SJeremy L Thompson       CeedVector vec;
416d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
417d1d35e2fSjeremylt       CeedChkBackend(ierr);
4181d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
4191d102b48SJeremy L Thompson         continue;
4201d102b48SJeremy L Thompson     }
421d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
422e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
423d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
4241d102b48SJeremy L Thompson     } else {
4254fc1f125SJeremy L Thompson       ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i],
4264fc1f125SJeremy L Thompson                                         (const CeedScalar **) &e_data_full[i]);
427e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
4281d102b48SJeremy L Thompson     }
4291d102b48SJeremy L Thompson   }
430e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4311d102b48SJeremy L Thompson }
4321d102b48SJeremy L Thompson 
433f10650afSjeremylt //------------------------------------------------------------------------------
434f10650afSjeremylt // Operator Apply
435f10650afSjeremylt //------------------------------------------------------------------------------
436d1d35e2fSjeremylt static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec,
437d1d35e2fSjeremylt                                         CeedVector out_vec,
4381d102b48SJeremy L Thompson                                         CeedRequest *request) {
4391d102b48SJeremy L Thompson   int ierr;
4401d102b48SJeremy L Thompson   CeedOperator_Blocked *impl;
441e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
442d1d35e2fSjeremylt   const CeedInt blk_size = 8;
443d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
444d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
445e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
446d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
4471d102b48SJeremy L Thompson   CeedQFunction qf;
448e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
449d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
4507e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
4517e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
452e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
453d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
4547e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
4557e7773b5SJeremy L Thompson                                 &qf_output_fields);
456e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
457d1d35e2fSjeremylt   CeedEvalMode eval_mode;
4581d102b48SJeremy L Thompson   CeedVector vec;
4594fc1f125SJeremy L Thompson   CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0};
4601d102b48SJeremy L Thompson 
4611d102b48SJeremy L Thompson   // Setup
462e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
4631d102b48SJeremy L Thompson 
4640b454692Sjeremylt   // Restriction only operator
4650b454692Sjeremylt   if (impl->is_identity_restr_op) {
4660b454692Sjeremylt     ierr = CeedElemRestrictionApply(impl->blk_restr[0], CEED_NOTRANSPOSE, in_vec,
4674fc1f125SJeremy L Thompson                                     impl->e_vecs_full[0], request); CeedChkBackend(ierr);
4680b454692Sjeremylt     ierr = CeedElemRestrictionApply(impl->blk_restr[1], CEED_TRANSPOSE,
4694fc1f125SJeremy L Thompson                                     impl->e_vecs_full[0], out_vec, request); CeedChkBackend(ierr);
4700b454692Sjeremylt     return CEED_ERROR_SUCCESS;
4710b454692Sjeremylt   }
4720b454692Sjeremylt 
4731d102b48SJeremy L Thompson   // Input Evecs and Restriction
474d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
4754fc1f125SJeremy L Thompson                                          op_input_fields, in_vec, false, e_data_full,
476a0162de9SJeremy L Thompson                                          impl, request); CeedChkBackend(ierr);
4771d102b48SJeremy L Thompson 
4781d102b48SJeremy L Thompson   // Output Evecs
479d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
4809c774eddSJeremy L Thompson     ierr = CeedVectorGetArrayWrite(impl->e_vecs_full[i+impl->num_inputs],
4819c774eddSJeremy L Thompson                                    CEED_MEM_HOST, &e_data_full[i + num_input_fields]);
4829c774eddSJeremy L Thompson     CeedChkBackend(ierr);
4831d102b48SJeremy L Thompson   }
4841d102b48SJeremy L Thompson 
4851d102b48SJeremy L Thompson   // Loop through elements
486d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
4871d102b48SJeremy L Thompson     // Output pointers
488d1d35e2fSjeremylt     for (CeedInt i=0; i<num_output_fields; i++) {
489d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
490e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
491d1d35e2fSjeremylt       if (eval_mode == CEED_EVAL_NONE) {
492d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
493e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
494d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
4954fc1f125SJeremy L Thompson                                   CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*Q*size]);
496e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
4971d102b48SJeremy L Thompson       }
4981d102b48SJeremy L Thompson     }
4991d102b48SJeremy L Thompson 
50016911fdaSjeremylt     // Input basis apply
501d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
5024fc1f125SJeremy L Thompson                                           num_input_fields, blk_size, false, e_data_full,
503a0162de9SJeremy L Thompson                                           impl); CeedChkBackend(ierr);
50416911fdaSjeremylt 
5051d102b48SJeremy L Thompson     // Q function
5060b454692Sjeremylt     if (!impl->is_identity_qf) {
507d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
508e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
50916911fdaSjeremylt     }
5101d102b48SJeremy L Thompson 
5111d102b48SJeremy L Thompson     // Output basis apply
512d1d35e2fSjeremylt     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields,
513d1d35e2fSjeremylt                                            blk_size, num_input_fields,
5144fc1f125SJeremy L Thompson                                            num_output_fields, op, e_data_full, impl);
515e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
5161d102b48SJeremy L Thompson   }
51789c6efa4Sjeremylt 
51889c6efa4Sjeremylt   // Output restriction
519d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
52089c6efa4Sjeremylt     // Restore evec
5214fc1f125SJeremy L Thompson     ierr = CeedVectorRestoreArray(impl->e_vecs_full[i+impl->num_inputs],
5229c774eddSJeremy L Thompson                                   &e_data_full[i + num_input_fields]);
5239c774eddSJeremy L Thompson     CeedChkBackend(ierr);
524d1bcdac9Sjeremylt     // Get output vector
525d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
526e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
52789c6efa4Sjeremylt     // Active
528d1bcdac9Sjeremylt     if (vec == CEED_VECTOR_ACTIVE)
529d1d35e2fSjeremylt       vec = out_vec;
5304a2e7687Sjeremylt     // Restrict
5314fc1f125SJeremy L Thompson     ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_inputs],
5324fc1f125SJeremy L Thompson                                     CEED_TRANSPOSE, impl->e_vecs_full[i+impl->num_inputs],
533e15f9bd0SJeremy L Thompson                                     vec, request); CeedChkBackend(ierr);
53489c6efa4Sjeremylt 
5354a2e7687Sjeremylt   }
5364a2e7687Sjeremylt 
5374a2e7687Sjeremylt   // Restore input arrays
538d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
5394fc1f125SJeremy L Thompson          op_input_fields, false, e_data_full, impl); CeedChkBackend(ierr);
5401d102b48SJeremy L Thompson 
541e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
5421d102b48SJeremy L Thompson }
5431d102b48SJeremy L Thompson 
544f10650afSjeremylt //------------------------------------------------------------------------------
54570a7ffb3SJeremy L Thompson // Core code for assembling linear QFunction
546f10650afSjeremylt //------------------------------------------------------------------------------
54770a7ffb3SJeremy L Thompson static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked(
548a0162de9SJeremy L Thompson   CeedOperator op, bool build_objects, CeedVector *assembled,
549a0162de9SJeremy L Thompson   CeedElemRestriction *rstr, CeedRequest *request) {
5501d102b48SJeremy L Thompson   int ierr;
5511d102b48SJeremy L Thompson   CeedOperator_Blocked *impl;
552e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
553d1d35e2fSjeremylt   const CeedInt blk_size = 8;
554d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
555d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
556e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
557d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
5581d102b48SJeremy L Thompson   CeedQFunction qf;
559e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
560d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
5617e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
5627e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
563e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
564d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
5657e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
5667e7773b5SJeremy L Thompson                                 &qf_output_fields);
567e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
5684fc1f125SJeremy L Thompson   CeedVector vec, l_vec = impl->qf_l_vec;
5694fc1f125SJeremy L Thompson   CeedInt num_active_in = impl->num_active_in,
5704fc1f125SJeremy L Thompson           num_active_out = impl->num_active_out;
571*d2643443SJeremy L Thompson   CeedSize q_size;
572bb219a0fSJeremy L Thompson   CeedVector *active_in = impl->qf_active_in;
5731d102b48SJeremy L Thompson   CeedScalar *a, *tmp;
5741d102b48SJeremy L Thompson   Ceed ceed;
575e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
5764fc1f125SJeremy L Thompson   CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0};
5771d102b48SJeremy L Thompson 
5781d102b48SJeremy L Thompson   // Setup
579e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
5801d102b48SJeremy L Thompson 
58116911fdaSjeremylt   // Check for identity
5820b454692Sjeremylt   if (impl->is_identity_qf)
58316911fdaSjeremylt     // LCOV_EXCL_START
584e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
585d1d35e2fSjeremylt                      "Assembling identity QFunctions not supported");
58616911fdaSjeremylt   // LCOV_EXCL_STOP
58716911fdaSjeremylt 
5881d102b48SJeremy L Thompson   // Input Evecs and Restriction
589d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
5904fc1f125SJeremy L Thompson                                          op_input_fields, NULL, true, e_data_full,
591a0162de9SJeremy L Thompson                                          impl, request); CeedChkBackend(ierr);
5921d102b48SJeremy L Thompson 
5931d102b48SJeremy L Thompson   // Count number of active input fields
594bb219a0fSJeremy L Thompson   if (!num_active_in) {
595d1d35e2fSjeremylt     for (CeedInt i=0; i<num_input_fields; i++) {
5961d102b48SJeremy L Thompson       // Get input vector
597d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
598d1d35e2fSjeremylt       CeedChkBackend(ierr);
5991d102b48SJeremy L Thompson       // Check if active input
6001d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE) {
601d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
602e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
603d1d35e2fSjeremylt         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
604d1d35e2fSjeremylt         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
605d1d35e2fSjeremylt         CeedChkBackend(ierr);
606d1d35e2fSjeremylt         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
6071d102b48SJeremy L Thompson         for (CeedInt field=0; field<size; field++) {
608*d2643443SJeremy L Thompson           q_size = (CeedSize)Q*blk_size;
609*d2643443SJeremy L Thompson           ierr = CeedVectorCreate(ceed, q_size, &active_in[num_active_in+field]);
610e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
611d1d35e2fSjeremylt           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
612d1d35e2fSjeremylt                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
613e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
6141d102b48SJeremy L Thompson         }
615d1d35e2fSjeremylt         num_active_in += size;
616d1d35e2fSjeremylt         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
6171d102b48SJeremy L Thompson       }
6181d102b48SJeremy L Thompson     }
6194fc1f125SJeremy L Thompson     impl->num_active_in = num_active_in;
620bb219a0fSJeremy L Thompson     impl->qf_active_in = active_in;
621bb219a0fSJeremy L Thompson   }
6221d102b48SJeremy L Thompson 
6231d102b48SJeremy L Thompson   // Count number of active output fields
624bb219a0fSJeremy L Thompson   if (!num_active_out) {
625d1d35e2fSjeremylt     for (CeedInt i=0; i<num_output_fields; i++) {
6261d102b48SJeremy L Thompson       // Get output vector
627d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
628e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6291d102b48SJeremy L Thompson       // Check if active output
6301d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE) {
631d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
632e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
633d1d35e2fSjeremylt         num_active_out += size;
6341d102b48SJeremy L Thompson       }
6351d102b48SJeremy L Thompson     }
6364fc1f125SJeremy L Thompson     impl->num_active_out = num_active_out;
637bb219a0fSJeremy L Thompson   }
6381d102b48SJeremy L Thompson 
6391d102b48SJeremy L Thompson   // Check sizes
640d1d35e2fSjeremylt   if (!num_active_in || !num_active_out)
6411d102b48SJeremy L Thompson     // LCOV_EXCL_START
642e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
643e15f9bd0SJeremy L Thompson                      "Cannot assemble QFunction without active inputs "
6441d102b48SJeremy L Thompson                      "and outputs");
6451d102b48SJeremy L Thompson   // LCOV_EXCL_STOP
6461d102b48SJeremy L Thompson 
647d1d35e2fSjeremylt   // Setup Lvec
6484fc1f125SJeremy L Thompson   if (!l_vec) {
649*d2643443SJeremy L Thompson     CeedSize l_size = (CeedSize)num_blks*blk_size*Q*num_active_in*num_active_out;
650*d2643443SJeremy L Thompson     ierr = CeedVectorCreate(ceed, l_size, &l_vec); CeedChkBackend(ierr);
6514fc1f125SJeremy L Thompson     impl->qf_l_vec = l_vec;
652bb219a0fSJeremy L Thompson   }
6539c774eddSJeremy L Thompson   ierr = CeedVectorGetArrayWrite(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
6541d102b48SJeremy L Thompson 
65570a7ffb3SJeremy L Thompson   // Build objects if needed
656d1d35e2fSjeremylt   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
65770a7ffb3SJeremy L Thompson   if (build_objects) {
65870a7ffb3SJeremy L Thompson     // Create output restriction
659d1d35e2fSjeremylt     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
660d1d35e2fSjeremylt                                             num_active_in*num_active_out,
661d1d35e2fSjeremylt                                             num_active_in*num_active_out*num_elem*Q,
662e15f9bd0SJeremy L Thompson                                             strides, rstr); CeedChkBackend(ierr);
6631d102b48SJeremy L Thompson     // Create assembled vector
664*d2643443SJeremy L Thompson     CeedSize l_size = (CeedSize)num_elem*Q*num_active_in*num_active_out;
665*d2643443SJeremy L Thompson     ierr = CeedVectorCreate(ceed, l_size, assembled); CeedChkBackend(ierr);
66670a7ffb3SJeremy L Thompson   }
6671d102b48SJeremy L Thompson 
6681d102b48SJeremy L Thompson   // Loop through elements
669d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
6701d102b48SJeremy L Thompson     // Input basis apply
671d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
6724fc1f125SJeremy L Thompson                                           num_input_fields, blk_size, true, e_data_full,
673a0162de9SJeremy L Thompson                                           impl); CeedChkBackend(ierr);
6741d102b48SJeremy L Thompson 
6751d102b48SJeremy L Thompson     // Assemble QFunction
676d1d35e2fSjeremylt     for (CeedInt in=0; in<num_active_in; in++) {
6771d102b48SJeremy L Thompson       // Set Inputs
678d1d35e2fSjeremylt       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
679d1d35e2fSjeremylt       if (num_active_in > 1) {
680d1d35e2fSjeremylt         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
681e15f9bd0SJeremy L Thompson                                   0.0); CeedChkBackend(ierr);
68242ea3801Sjeremylt       }
6831d102b48SJeremy L Thompson       // Set Outputs
684d1d35e2fSjeremylt       for (CeedInt out=0; out<num_output_fields; out++) {
6851d102b48SJeremy L Thompson         // Get output vector
686d1d35e2fSjeremylt         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
687e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
6881d102b48SJeremy L Thompson         // Check if active output
6891d102b48SJeremy L Thompson         if (vec == CEED_VECTOR_ACTIVE) {
690d1d35e2fSjeremylt           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
691e15f9bd0SJeremy L Thompson                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
692d1d35e2fSjeremylt           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
693e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
694d1d35e2fSjeremylt           a += size*Q*blk_size; // Advance the pointer by the size of the output
6951d102b48SJeremy L Thompson         }
6961d102b48SJeremy L Thompson       }
6971d102b48SJeremy L Thompson       // Apply QFunction
698d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
699e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
7004a2e7687Sjeremylt     }
7014a2e7687Sjeremylt   }
7024a2e7687Sjeremylt 
7031d102b48SJeremy L Thompson   // Un-set output Qvecs to prevent accidental overwrite of Assembled
704d1d35e2fSjeremylt   for (CeedInt out=0; out<num_output_fields; out++) {
7051d102b48SJeremy L Thompson     // Get output vector
706d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
707e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
7081d102b48SJeremy L Thompson     // Check if active output
7091d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
710d1d35e2fSjeremylt       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
711e15f9bd0SJeremy L Thompson                          NULL); CeedChkBackend(ierr);
7121d102b48SJeremy L Thompson     }
7131d102b48SJeremy L Thompson   }
7141d102b48SJeremy L Thompson 
7151d102b48SJeremy L Thompson   // Restore input arrays
716d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
7174fc1f125SJeremy L Thompson          op_input_fields, true, e_data_full, impl); CeedChkBackend(ierr);
7181d102b48SJeremy L Thompson 
7191d102b48SJeremy L Thompson   // Output blocked restriction
7204fc1f125SJeremy L Thompson   ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr);
721e15f9bd0SJeremy L Thompson   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
722bb219a0fSJeremy L Thompson   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
723bb219a0fSJeremy L Thompson   if (!impl->qf_blk_rstr) {
724d1d35e2fSjeremylt     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
725d1d35e2fSjeremylt            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
726d1d35e2fSjeremylt            strides, &blk_rstr); CeedChkBackend(ierr);
727bb219a0fSJeremy L Thompson     impl->qf_blk_rstr = blk_rstr;
728bb219a0fSJeremy L Thompson   }
7294fc1f125SJeremy L Thompson   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled,
730e15f9bd0SJeremy L Thompson                                   request); CeedChkBackend(ierr);
7311d102b48SJeremy L Thompson 
732e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
7334a2e7687Sjeremylt }
7344a2e7687Sjeremylt 
735f10650afSjeremylt //------------------------------------------------------------------------------
73670a7ffb3SJeremy L Thompson // Assemble Linear QFunction
73770a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
73870a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
73970a7ffb3SJeremy L Thompson     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
74070a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, true, assembled,
74170a7ffb3SJeremy L Thompson          rstr, request);
74270a7ffb3SJeremy L Thompson }
74370a7ffb3SJeremy L Thompson 
74470a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
74570a7ffb3SJeremy L Thompson // Update Assembled Linear QFunction
74670a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
74770a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunctionUpdate_Blocked(CeedOperator op,
74870a7ffb3SJeremy L Thompson     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
74970a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, false, &assembled,
75070a7ffb3SJeremy L Thompson          &rstr, request);
75170a7ffb3SJeremy L Thompson }
75270a7ffb3SJeremy L Thompson 
75370a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
754f10650afSjeremylt // Operator Destroy
755f10650afSjeremylt //------------------------------------------------------------------------------
756f10650afSjeremylt static int CeedOperatorDestroy_Blocked(CeedOperator op) {
757f10650afSjeremylt   int ierr;
758f10650afSjeremylt   CeedOperator_Blocked *impl;
759e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
760f10650afSjeremylt 
7614fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) {
762d1d35e2fSjeremylt     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
7634fc1f125SJeremy L Thompson     ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr);
764f10650afSjeremylt   }
765d1d35e2fSjeremylt   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
7664fc1f125SJeremy L Thompson   ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr);
7674fc1f125SJeremy L Thompson   ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr);
768f10650afSjeremylt 
7694fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_inputs; i++) {
770d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
771d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
772f10650afSjeremylt   }
773d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
774d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
775f10650afSjeremylt 
7764fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_outputs; i++) {
777d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
778d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
779f10650afSjeremylt   }
780d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
781d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
782f10650afSjeremylt 
783bb219a0fSJeremy L Thompson   // QFunction assembly data
7844fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_active_in; i++) {
785bb219a0fSJeremy L Thompson     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
786bb219a0fSJeremy L Thompson   }
787bb219a0fSJeremy L Thompson   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
7884fc1f125SJeremy L Thompson   ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr);
789bb219a0fSJeremy L Thompson   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
790bb219a0fSJeremy L Thompson 
791e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
792e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
793f10650afSjeremylt }
794f10650afSjeremylt 
795f10650afSjeremylt //------------------------------------------------------------------------------
796f10650afSjeremylt // Operator Create
797f10650afSjeremylt //------------------------------------------------------------------------------
7984a2e7687Sjeremylt int CeedOperatorCreate_Blocked(CeedOperator op) {
7994a2e7687Sjeremylt   int ierr;
800fe2413ffSjeremylt   Ceed ceed;
801e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
8024ce2993fSjeremylt   CeedOperator_Blocked *impl;
8034a2e7687Sjeremylt 
804e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
805e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
806fe2413ffSjeremylt 
80780ac2e43SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
80880ac2e43SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunction_Blocked);
809e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
81070a7ffb3SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op,
81170a7ffb3SJeremy L Thompson                                 "LinearAssembleQFunctionUpdate",
81270a7ffb3SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunctionUpdate_Blocked);
81370a7ffb3SJeremy L Thompson   CeedChkBackend(ierr);
814cae8b89aSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
815e15f9bd0SJeremy L Thompson                                 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr);
816fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
817e15f9bd0SJeremy L Thompson                                 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr);
818e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
8194a2e7687Sjeremylt }
820f10650afSjeremylt //------------------------------------------------------------------------------
821