xref: /libCEED/rust/libceed-sys/c-src/backends/blocked/ceed-blocked-operator.c (revision 70a7ffb32f2ac8f6de27898459ba6016395fdb67)
14a2e7687Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
24a2e7687Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
34a2e7687Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
44a2e7687Sjeremylt //
54a2e7687Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
64a2e7687Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
74a2e7687Sjeremylt // element discretizations for exascale applications. For more information and
84a2e7687Sjeremylt // source code availability see http://github.com/ceed.
94a2e7687Sjeremylt //
104a2e7687Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
114a2e7687Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
124a2e7687Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
134a2e7687Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
144a2e7687Sjeremylt // software, applications, hardware, advanced system engineering and early
154a2e7687Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
164a2e7687Sjeremylt 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
193d576824SJeremy L Thompson #include <stdbool.h>
203d576824SJeremy L Thompson #include <stddef.h>
213d576824SJeremy L Thompson #include <stdint.h>
224a2e7687Sjeremylt #include "ceed-blocked.h"
234a2e7687Sjeremylt 
24f10650afSjeremylt //------------------------------------------------------------------------------
25f10650afSjeremylt // Setup Input/Output Fields
26f10650afSjeremylt //------------------------------------------------------------------------------
2789c6efa4Sjeremylt static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
28d1d35e2fSjeremylt     CeedOperator op, bool inOrOut, CeedElemRestriction *blk_restr,
29d1d35e2fSjeremylt     CeedVector *full_evecs, CeedVector *e_vecs, CeedVector *q_vecs,
30d1d35e2fSjeremylt     CeedInt start_e, CeedInt numfields, CeedInt Q) {
31d1d35e2fSjeremylt   CeedInt dim, ierr, num_comp, size, P;
32aedaa0e5Sjeremylt   Ceed ceed;
33e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
34d1bcdac9Sjeremylt   CeedBasis basis;
35d1bcdac9Sjeremylt   CeedElemRestriction r;
36d1d35e2fSjeremylt   CeedOperatorField *op_fields;
37d1d35e2fSjeremylt   CeedQFunctionField *qf_fields;
38fe2413ffSjeremylt   if (inOrOut) {
397e7773b5SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields);
40e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
417e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
42e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
43fe2413ffSjeremylt   } else {
447e7773b5SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
45e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
467e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
47e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
48fe2413ffSjeremylt   }
49d1d35e2fSjeremylt   const CeedInt blk_size = 8;
504a2e7687Sjeremylt 
514a2e7687Sjeremylt   // Loop over fields
524a2e7687Sjeremylt   for (CeedInt i=0; i<numfields; i++) {
53d1d35e2fSjeremylt     CeedEvalMode eval_mode;
54d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
55d1d35e2fSjeremylt     CeedChkBackend(ierr);
564a2e7687Sjeremylt 
57d1d35e2fSjeremylt     if (eval_mode != CEED_EVAL_WEIGHT) {
58d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
59e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
60e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
61d1d35e2fSjeremylt       CeedInt num_elem, elem_size, l_size, comp_stride;
62d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
63d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
64d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
65d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
66bd33150aSjeremylt 
673ac43b2cSJeremy L Thompson       bool strided;
68e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
693ac43b2cSJeremy L Thompson       if (strided) {
703ac43b2cSJeremy L Thompson         CeedInt strides[3];
71e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
72d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
73d1d35e2fSjeremylt                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
74e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
753ac43b2cSJeremy L Thompson       } else {
76bd33150aSjeremylt         const CeedInt *offsets = NULL;
77bd33150aSjeremylt         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
78e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
79d1d35e2fSjeremylt         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
80d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
81d1d35e2fSjeremylt                                                 blk_size, num_comp, comp_stride,
82d1d35e2fSjeremylt                                                 l_size, CEED_MEM_HOST,
83bd33150aSjeremylt                                                 CEED_COPY_VALUES, offsets,
84d1d35e2fSjeremylt                                                 &blk_restr[i+start_e]);
85e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
86e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
873ac43b2cSJeremy L Thompson       }
88d1d35e2fSjeremylt       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
89d1d35e2fSjeremylt                                              &full_evecs[i+start_e]);
90e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
914a2e7687Sjeremylt     }
924a2e7687Sjeremylt 
93d1d35e2fSjeremylt     switch(eval_mode) {
944a2e7687Sjeremylt     case CEED_EVAL_NONE:
95d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
96d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
97d1d35e2fSjeremylt       CeedChkBackend(ierr);
98aedaa0e5Sjeremylt       break;
99aedaa0e5Sjeremylt     case CEED_EVAL_INTERP:
100d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
1014d1cd9fcSJeremy L Thompson       ierr = CeedElemRestrictionGetElementSize(r, &P);
102e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
103d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]);
104d1d35e2fSjeremylt       CeedChkBackend(ierr);
105d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
106d1d35e2fSjeremylt       CeedChkBackend(ierr);
1074a2e7687Sjeremylt       break;
1084a2e7687Sjeremylt     case CEED_EVAL_GRAD:
109d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
110d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
111e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
1124d1cd9fcSJeremy L Thompson       ierr = CeedElemRestrictionGetElementSize(r, &P);
113e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
114d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]);
115e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
116d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
117d1d35e2fSjeremylt       CeedChkBackend(ierr);
1184a2e7687Sjeremylt       break;
1194a2e7687Sjeremylt     case CEED_EVAL_WEIGHT: // Only on input fields
120d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
121d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
122d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
123d1d35e2fSjeremylt                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
124e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
125aedaa0e5Sjeremylt 
1264a2e7687Sjeremylt       break;
1274a2e7687Sjeremylt     case CEED_EVAL_DIV:
1284d537eeaSYohann       break; // Not implemented
1294a2e7687Sjeremylt     case CEED_EVAL_CURL:
1304d537eeaSYohann       break; // Not implemented
1314a2e7687Sjeremylt     }
1324a2e7687Sjeremylt   }
133e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1344a2e7687Sjeremylt }
1354a2e7687Sjeremylt 
136f10650afSjeremylt //------------------------------------------------------------------------------
137f10650afSjeremylt // Setup Operator
138f10650afSjeremylt //------------------------------------------------------------------------------
1394a2e7687Sjeremylt static int CeedOperatorSetup_Blocked(CeedOperator op) {
1404a2e7687Sjeremylt   int ierr;
141d1d35e2fSjeremylt   bool setup_done;
142d1d35e2fSjeremylt   ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr);
143d1d35e2fSjeremylt   if (setup_done) return CEED_ERROR_SUCCESS;
144aedaa0e5Sjeremylt   Ceed ceed;
145e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
1464ce2993fSjeremylt   CeedOperator_Blocked *impl;
147e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
1484ce2993fSjeremylt   CeedQFunction qf;
149e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
150d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields;
151e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
1520b454692Sjeremylt   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
153d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
1547e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
1557e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
156e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
157d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
1587e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
1597e7773b5SJeremy L Thompson                                 &qf_output_fields);
160e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1614a2e7687Sjeremylt 
1624a2e7687Sjeremylt   // Allocate
163d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
164e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
165d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs);
166e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
167d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data);
168e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1694a2e7687Sjeremylt 
170d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->input_state); CeedChkBackend(ierr);
171d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->e_vecs_in); CeedChkBackend(ierr);
172d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->e_vecs_out); CeedChkBackend(ierr);
173d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->q_vecs_in); CeedChkBackend(ierr);
174d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->q_vecs_out); CeedChkBackend(ierr);
1754a2e7687Sjeremylt 
176d1d35e2fSjeremylt   impl->num_e_vecs_in = num_input_fields;
177d1d35e2fSjeremylt   impl->num_e_vecs_out = num_output_fields;
178aedaa0e5Sjeremylt 
1794a2e7687Sjeremylt   // Set up infield and outfield pointer arrays
1804a2e7687Sjeremylt   // Infields
181d1d35e2fSjeremylt   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blk_restr,
182d1d35e2fSjeremylt                                          impl->e_vecs, impl->e_vecs_in,
183d1d35e2fSjeremylt                                          impl->q_vecs_in, 0,
184d1d35e2fSjeremylt                                          num_input_fields, Q);
185e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1864a2e7687Sjeremylt   // Outfields
187d1d35e2fSjeremylt   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blk_restr,
188d1d35e2fSjeremylt                                          impl->e_vecs, impl->e_vecs_out,
189d1d35e2fSjeremylt                                          impl->q_vecs_out, num_input_fields,
190d1d35e2fSjeremylt                                          num_output_fields, Q);
191e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
192aedaa0e5Sjeremylt 
19316911fdaSjeremylt   // Identity QFunctions
1940b454692Sjeremylt   if (impl->is_identity_qf) {
195d1d35e2fSjeremylt     CeedEvalMode in_mode, out_mode;
196d1d35e2fSjeremylt     CeedQFunctionField *in_fields, *out_fields;
1977e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
198e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
1990b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
200d1d35e2fSjeremylt     CeedChkBackend(ierr);
2010b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
202d1d35e2fSjeremylt     CeedChkBackend(ierr);
203d1d35e2fSjeremylt 
2040b454692Sjeremylt     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
2050b454692Sjeremylt       impl->is_identity_restr_op = true;
2060b454692Sjeremylt     } else {
2070b454692Sjeremylt       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
2080b454692Sjeremylt       impl->q_vecs_out[0] = impl->q_vecs_in[0];
2090b454692Sjeremylt       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
21016911fdaSjeremylt     }
21116911fdaSjeremylt   }
21216911fdaSjeremylt 
213e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
2144a2e7687Sjeremylt 
215e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2164a2e7687Sjeremylt }
2174a2e7687Sjeremylt 
218f10650afSjeremylt //------------------------------------------------------------------------------
219f10650afSjeremylt // Setup Operator Inputs
220f10650afSjeremylt //------------------------------------------------------------------------------
221d1d35e2fSjeremylt static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields,
222d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
223d1d35e2fSjeremylt     CeedVector in_vec, bool skip_active, CeedOperator_Blocked *impl,
22489c6efa4Sjeremylt     CeedRequest *request) {
2251d102b48SJeremy L Thompson   CeedInt ierr;
226d1d35e2fSjeremylt   CeedEvalMode eval_mode;
227d1bcdac9Sjeremylt   CeedVector vec;
22816c359e6Sjeremylt   uint64_t state;
2294a2e7687Sjeremylt 
230d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
2311d102b48SJeremy L Thompson     // Get input vector
232d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
233d1d35e2fSjeremylt     CeedChkBackend(ierr);
2341d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
235d1d35e2fSjeremylt       if (skip_active)
2361d102b48SJeremy L Thompson         continue;
2371d102b48SJeremy L Thompson       else
238d1d35e2fSjeremylt         vec = in_vec;
2391d102b48SJeremy L Thompson     }
2401d102b48SJeremy L Thompson 
241d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
242e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
243d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
2444a2e7687Sjeremylt     } else {
2454a2e7687Sjeremylt       // Restrict
246e15f9bd0SJeremy L Thompson       ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
247d1d35e2fSjeremylt       if (state != impl->input_state[i] || vec == in_vec) {
248d1d35e2fSjeremylt         ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
249d1d35e2fSjeremylt                                         vec, impl->e_vecs[i], request);
250e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
251d1d35e2fSjeremylt         impl->input_state[i] = state;
25216c359e6Sjeremylt       }
2534a2e7687Sjeremylt       // Get evec
254d1d35e2fSjeremylt       ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST,
255d1d35e2fSjeremylt                                     (const CeedScalar **) &impl->e_data[i]);
256e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
2574a2e7687Sjeremylt     }
2584a2e7687Sjeremylt   }
259e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2604a2e7687Sjeremylt }
2614a2e7687Sjeremylt 
262f10650afSjeremylt //------------------------------------------------------------------------------
263f10650afSjeremylt // Input Basis Action
264f10650afSjeremylt //------------------------------------------------------------------------------
2651d102b48SJeremy L Thompson static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
266d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
267d1d35e2fSjeremylt     CeedInt num_input_fields, CeedInt blk_size, bool skip_active,
2681d102b48SJeremy L Thompson     CeedOperator_Blocked *impl) {
2691d102b48SJeremy L Thompson   CeedInt ierr;
270d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
271d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
272d1d35e2fSjeremylt   CeedEvalMode eval_mode;
2731d102b48SJeremy L Thompson   CeedBasis basis;
2741d102b48SJeremy L Thompson 
275d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
2761d102b48SJeremy L Thompson     // Skip active input
277d1d35e2fSjeremylt     if (skip_active) {
2781d102b48SJeremy L Thompson       CeedVector vec;
279d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
280d1d35e2fSjeremylt       CeedChkBackend(ierr);
2811d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
2821d102b48SJeremy L Thompson         continue;
2831d102b48SJeremy L Thompson     }
2841d102b48SJeremy L Thompson 
285d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
286d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
287e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
288d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
289e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
290d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
291e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
292d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
293d1d35e2fSjeremylt     CeedChkBackend(ierr);
2944a2e7687Sjeremylt     // Basis action
295d1d35e2fSjeremylt     switch(eval_mode) {
2964a2e7687Sjeremylt     case CEED_EVAL_NONE:
297d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
298d1d35e2fSjeremylt                                 CEED_USE_POINTER, &impl->e_data[i][e*Q*size]); CeedChkBackend(ierr);
2994a2e7687Sjeremylt       break;
3004a2e7687Sjeremylt     case CEED_EVAL_INTERP:
301d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
302e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
303d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
304d1d35e2fSjeremylt                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size]);
305e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
306d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
307d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
308d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
3094a2e7687Sjeremylt       break;
3104a2e7687Sjeremylt     case CEED_EVAL_GRAD:
311d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
312e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
313e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
314d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
315d1d35e2fSjeremylt                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size/dim]);
316e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
317d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
318d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
319d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
3204a2e7687Sjeremylt       break;
3214a2e7687Sjeremylt     case CEED_EVAL_WEIGHT:
3224a2e7687Sjeremylt       break;  // No action
323bbfacfcdSjeremylt     // LCOV_EXCL_START
3244a2e7687Sjeremylt     case CEED_EVAL_DIV:
3251d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
326d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
327e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
3281d102b48SJeremy L Thompson       Ceed ceed;
329e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
330e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
331e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
332bbfacfcdSjeremylt       // LCOV_EXCL_STOP
3334a2e7687Sjeremylt     }
3344a2e7687Sjeremylt     }
33589c6efa4Sjeremylt   }
336e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
33789c6efa4Sjeremylt }
3384a2e7687Sjeremylt 
339f10650afSjeremylt //------------------------------------------------------------------------------
340f10650afSjeremylt // Output Basis Action
341f10650afSjeremylt //------------------------------------------------------------------------------
3421d102b48SJeremy L Thompson static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
343d1d35e2fSjeremylt     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
344d1d35e2fSjeremylt     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
3451d102b48SJeremy L Thompson     CeedOperator op, CeedOperator_Blocked *impl) {
3461d102b48SJeremy L Thompson   CeedInt ierr;
347d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
348d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
349d1d35e2fSjeremylt   CeedEvalMode eval_mode;
3501d102b48SJeremy L Thompson   CeedBasis basis;
3511d102b48SJeremy L Thompson 
352d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
353d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
354d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
355e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
356d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
357e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
358d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
359e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
360d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
361e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
3624a2e7687Sjeremylt     // Basis action
363d1d35e2fSjeremylt     switch(eval_mode) {
3644a2e7687Sjeremylt     case CEED_EVAL_NONE:
3654a2e7687Sjeremylt       break; // No action
3664a2e7687Sjeremylt     case CEED_EVAL_INTERP:
367d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
368e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
369d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
370d1d35e2fSjeremylt                                 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size]);
371e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
372d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
373d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
374d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
3754a2e7687Sjeremylt       break;
3764a2e7687Sjeremylt     case CEED_EVAL_GRAD:
377d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
378e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
379e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
380d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
381d1d35e2fSjeremylt                                 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size/dim]);
382e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
383d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
384d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
385d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
3864a2e7687Sjeremylt       break;
387c042f62fSJeremy L Thompson     // LCOV_EXCL_START
388bbfacfcdSjeremylt     case CEED_EVAL_WEIGHT: {
3894ce2993fSjeremylt       Ceed ceed;
390e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
391e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
392e15f9bd0SJeremy L Thompson                        "CEED_EVAL_WEIGHT cannot be an output "
3931d102b48SJeremy L Thompson                        "evaluation mode");
3944ce2993fSjeremylt     }
3954a2e7687Sjeremylt     case CEED_EVAL_DIV:
3961d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
3971d102b48SJeremy L Thompson       Ceed ceed;
398e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
399e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
400e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
401bbfacfcdSjeremylt       // LCOV_EXCL_STOP
4024a2e7687Sjeremylt     }
40389c6efa4Sjeremylt     }
40489c6efa4Sjeremylt   }
405e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4061d102b48SJeremy L Thompson }
4071d102b48SJeremy L Thompson 
408f10650afSjeremylt //------------------------------------------------------------------------------
409f10650afSjeremylt // Restore Input Vectors
410f10650afSjeremylt //------------------------------------------------------------------------------
411d1d35e2fSjeremylt static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields,
412d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
413d1d35e2fSjeremylt     bool skip_active, CeedOperator_Blocked *impl) {
4141d102b48SJeremy L Thompson   CeedInt ierr;
415d1d35e2fSjeremylt   CeedEvalMode eval_mode;
4161d102b48SJeremy L Thompson 
417d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
4181d102b48SJeremy L Thompson     // Skip active inputs
419d1d35e2fSjeremylt     if (skip_active) {
4201d102b48SJeremy L Thompson       CeedVector vec;
421d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
422d1d35e2fSjeremylt       CeedChkBackend(ierr);
4231d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
4241d102b48SJeremy L Thompson         continue;
4251d102b48SJeremy L Thompson     }
426d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
427e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
428d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
4291d102b48SJeremy L Thompson     } else {
430d1d35e2fSjeremylt       ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i],
431d1d35e2fSjeremylt                                         (const CeedScalar **) &impl->e_data[i]);
432e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
4331d102b48SJeremy L Thompson     }
4341d102b48SJeremy L Thompson   }
435e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4361d102b48SJeremy L Thompson }
4371d102b48SJeremy L Thompson 
438f10650afSjeremylt //------------------------------------------------------------------------------
439f10650afSjeremylt // Operator Apply
440f10650afSjeremylt //------------------------------------------------------------------------------
441d1d35e2fSjeremylt static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec,
442d1d35e2fSjeremylt                                         CeedVector out_vec,
4431d102b48SJeremy L Thompson                                         CeedRequest *request) {
4441d102b48SJeremy L Thompson   int ierr;
4451d102b48SJeremy L Thompson   CeedOperator_Blocked *impl;
446e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
447d1d35e2fSjeremylt   const CeedInt blk_size = 8;
448d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
449d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
450e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
451d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
4521d102b48SJeremy L Thompson   CeedQFunction qf;
453e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
454d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
4557e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
4567e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
457e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
458d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
4597e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
4607e7773b5SJeremy L Thompson                                 &qf_output_fields);
461e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
462d1d35e2fSjeremylt   CeedEvalMode eval_mode;
4631d102b48SJeremy L Thompson   CeedVector vec;
4641d102b48SJeremy L Thompson 
4651d102b48SJeremy L Thompson   // Setup
466e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
4671d102b48SJeremy L Thompson 
4680b454692Sjeremylt   // Restriction only operator
4690b454692Sjeremylt   if (impl->is_identity_restr_op) {
4700b454692Sjeremylt     ierr = CeedElemRestrictionApply(impl->blk_restr[0], CEED_NOTRANSPOSE, in_vec,
4710b454692Sjeremylt                                     impl->e_vecs[0], request); CeedChkBackend(ierr);
4720b454692Sjeremylt     ierr = CeedElemRestrictionApply(impl->blk_restr[1], CEED_TRANSPOSE,
4730b454692Sjeremylt                                     impl->e_vecs[0], out_vec, request); CeedChkBackend(ierr);
4740b454692Sjeremylt     return CEED_ERROR_SUCCESS;
4750b454692Sjeremylt   }
4760b454692Sjeremylt 
4771d102b48SJeremy L Thompson   // Input Evecs and Restriction
478d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
479d1d35e2fSjeremylt                                          op_input_fields, in_vec, false, impl,
480e15f9bd0SJeremy L Thompson                                          request); CeedChkBackend(ierr);
4811d102b48SJeremy L Thompson 
4821d102b48SJeremy L Thompson   // Output Evecs
483d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
484d1d35e2fSjeremylt     ierr = CeedVectorGetArray(impl->e_vecs[i+impl->num_e_vecs_in], CEED_MEM_HOST,
485d1d35e2fSjeremylt                               &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
4861d102b48SJeremy L Thompson   }
4871d102b48SJeremy L Thompson 
4881d102b48SJeremy L Thompson   // Loop through elements
489d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
4901d102b48SJeremy L Thompson     // Output pointers
491d1d35e2fSjeremylt     for (CeedInt i=0; i<num_output_fields; i++) {
492d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
493e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
494d1d35e2fSjeremylt       if (eval_mode == CEED_EVAL_NONE) {
495d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
496e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
497d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
498d1d35e2fSjeremylt                                   CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*Q*size]);
499e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
5001d102b48SJeremy L Thompson       }
5011d102b48SJeremy L Thompson     }
5021d102b48SJeremy L Thompson 
50316911fdaSjeremylt     // Input basis apply
504d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
505d1d35e2fSjeremylt                                           num_input_fields, blk_size, false, impl);
506e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
50716911fdaSjeremylt 
5081d102b48SJeremy L Thompson     // Q function
5090b454692Sjeremylt     if (!impl->is_identity_qf) {
510d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
511e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
51216911fdaSjeremylt     }
5131d102b48SJeremy L Thompson 
5141d102b48SJeremy L Thompson     // Output basis apply
515d1d35e2fSjeremylt     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields,
516d1d35e2fSjeremylt                                            blk_size, num_input_fields,
517d1d35e2fSjeremylt                                            num_output_fields, op, impl);
518e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
5191d102b48SJeremy L Thompson   }
52089c6efa4Sjeremylt 
52189c6efa4Sjeremylt   // Output restriction
522d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
52389c6efa4Sjeremylt     // Restore evec
524d1d35e2fSjeremylt     ierr = CeedVectorRestoreArray(impl->e_vecs[i+impl->num_e_vecs_in],
525d1d35e2fSjeremylt                                   &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
526d1bcdac9Sjeremylt     // Get output vector
527d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
528e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
52989c6efa4Sjeremylt     // Active
530d1bcdac9Sjeremylt     if (vec == CEED_VECTOR_ACTIVE)
531d1d35e2fSjeremylt       vec = out_vec;
5324a2e7687Sjeremylt     // Restrict
533d1d35e2fSjeremylt     ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_e_vecs_in],
534d1d35e2fSjeremylt                                     CEED_TRANSPOSE, impl->e_vecs[i+impl->num_e_vecs_in],
535e15f9bd0SJeremy L Thompson                                     vec, request); CeedChkBackend(ierr);
53689c6efa4Sjeremylt 
5374a2e7687Sjeremylt   }
5384a2e7687Sjeremylt 
5394a2e7687Sjeremylt   // Restore input arrays
540d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
541d1d35e2fSjeremylt          op_input_fields, false, impl); CeedChkBackend(ierr);
5421d102b48SJeremy L Thompson 
543e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
5441d102b48SJeremy L Thompson }
5451d102b48SJeremy L Thompson 
546f10650afSjeremylt //------------------------------------------------------------------------------
547*70a7ffb3SJeremy L Thompson // Core code for assembling linear QFunction
548f10650afSjeremylt //------------------------------------------------------------------------------
549*70a7ffb3SJeremy L Thompson static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked(
550*70a7ffb3SJeremy L Thompson   CeedOperator op,
551*70a7ffb3SJeremy L Thompson   bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
552*70a7ffb3SJeremy L Thompson   CeedRequest *request) {
5531d102b48SJeremy L Thompson   int ierr;
5541d102b48SJeremy L Thompson   CeedOperator_Blocked *impl;
555e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
556d1d35e2fSjeremylt   const CeedInt blk_size = 8;
557d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
558d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
559e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
560d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
5611d102b48SJeremy L Thompson   CeedQFunction qf;
562e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
563d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
5647e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
5657e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
566e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
567d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
5687e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
5697e7773b5SJeremy L Thompson                                 &qf_output_fields);
570e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
5711d102b48SJeremy L Thompson   CeedVector vec, lvec;
572d1d35e2fSjeremylt   CeedInt num_active_in = 0, num_active_out = 0;
573d1d35e2fSjeremylt   CeedVector *active_in = NULL;
5741d102b48SJeremy L Thompson   CeedScalar *a, *tmp;
5751d102b48SJeremy L Thompson   Ceed ceed;
576e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
5771d102b48SJeremy L Thompson 
5781d102b48SJeremy L Thompson   // Setup
579e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
5801d102b48SJeremy L Thompson 
58116911fdaSjeremylt   // Check for identity
5820b454692Sjeremylt   if (impl->is_identity_qf)
58316911fdaSjeremylt     // LCOV_EXCL_START
584e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
585d1d35e2fSjeremylt                      "Assembling identity QFunctions not supported");
58616911fdaSjeremylt   // LCOV_EXCL_STOP
58716911fdaSjeremylt 
5881d102b48SJeremy L Thompson   // Input Evecs and Restriction
589d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
590d1d35e2fSjeremylt                                          op_input_fields, NULL, true, impl,
591e15f9bd0SJeremy L Thompson                                          request); CeedChkBackend(ierr);
5921d102b48SJeremy L Thompson 
5931d102b48SJeremy L Thompson   // Count number of active input fields
594d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
5951d102b48SJeremy L Thompson     // Get input vector
596d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
597d1d35e2fSjeremylt     CeedChkBackend(ierr);
5981d102b48SJeremy L Thompson     // Check if active input
5991d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
600d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
601e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
602d1d35e2fSjeremylt       ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
603d1d35e2fSjeremylt       ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
604d1d35e2fSjeremylt       CeedChkBackend(ierr);
605d1d35e2fSjeremylt       ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
6061d102b48SJeremy L Thompson       for (CeedInt field=0; field<size; field++) {
607d1d35e2fSjeremylt         ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
608e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
609d1d35e2fSjeremylt         ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
610d1d35e2fSjeremylt                                   CEED_USE_POINTER, &tmp[field*Q*blk_size]);
611e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
6121d102b48SJeremy L Thompson       }
613d1d35e2fSjeremylt       num_active_in += size;
614d1d35e2fSjeremylt       ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
6151d102b48SJeremy L Thompson     }
6161d102b48SJeremy L Thompson   }
6171d102b48SJeremy L Thompson 
6181d102b48SJeremy L Thompson   // Count number of active output fields
619d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
6201d102b48SJeremy L Thompson     // Get output vector
621d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
622e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
6231d102b48SJeremy L Thompson     // Check if active output
6241d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
625d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
626e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
627d1d35e2fSjeremylt       num_active_out += size;
6281d102b48SJeremy L Thompson     }
6291d102b48SJeremy L Thompson   }
6301d102b48SJeremy L Thompson 
6311d102b48SJeremy L Thompson   // Check sizes
632d1d35e2fSjeremylt   if (!num_active_in || !num_active_out)
6331d102b48SJeremy L Thompson     // LCOV_EXCL_START
634e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
635e15f9bd0SJeremy L Thompson                      "Cannot assemble QFunction without active inputs "
6361d102b48SJeremy L Thompson                      "and outputs");
6371d102b48SJeremy L Thompson   // LCOV_EXCL_STOP
6381d102b48SJeremy L Thompson 
639d1d35e2fSjeremylt   // Setup Lvec
640d1d35e2fSjeremylt   ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
641e15f9bd0SJeremy L Thompson                           &lvec); CeedChkBackend(ierr);
642e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
6431d102b48SJeremy L Thompson 
644*70a7ffb3SJeremy L Thompson   // Build objects if needed
645d1d35e2fSjeremylt   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
646*70a7ffb3SJeremy L Thompson   if (build_objects) {
647*70a7ffb3SJeremy L Thompson     // Create output restriction
648d1d35e2fSjeremylt     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
649d1d35e2fSjeremylt                                             num_active_in*num_active_out,
650d1d35e2fSjeremylt                                             num_active_in*num_active_out*num_elem*Q,
651e15f9bd0SJeremy L Thompson                                             strides, rstr); CeedChkBackend(ierr);
6521d102b48SJeremy L Thompson     // Create assembled vector
653d1d35e2fSjeremylt     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
654e15f9bd0SJeremy L Thompson                             assembled); CeedChkBackend(ierr);
655*70a7ffb3SJeremy L Thompson   }
6561d102b48SJeremy L Thompson 
6571d102b48SJeremy L Thompson   // Loop through elements
658d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
6591d102b48SJeremy L Thompson     // Input basis apply
660d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
661d1d35e2fSjeremylt                                           num_input_fields, blk_size, true, impl);
662e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
6631d102b48SJeremy L Thompson 
6641d102b48SJeremy L Thompson     // Assemble QFunction
665d1d35e2fSjeremylt     for (CeedInt in=0; in<num_active_in; in++) {
6661d102b48SJeremy L Thompson       // Set Inputs
667d1d35e2fSjeremylt       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
668d1d35e2fSjeremylt       if (num_active_in > 1) {
669d1d35e2fSjeremylt         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
670e15f9bd0SJeremy L Thompson                                   0.0); CeedChkBackend(ierr);
67142ea3801Sjeremylt       }
6721d102b48SJeremy L Thompson       // Set Outputs
673d1d35e2fSjeremylt       for (CeedInt out=0; out<num_output_fields; out++) {
6741d102b48SJeremy L Thompson         // Get output vector
675d1d35e2fSjeremylt         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
676e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
6771d102b48SJeremy L Thompson         // Check if active output
6781d102b48SJeremy L Thompson         if (vec == CEED_VECTOR_ACTIVE) {
679d1d35e2fSjeremylt           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
680e15f9bd0SJeremy L Thompson                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
681d1d35e2fSjeremylt           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
682e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
683d1d35e2fSjeremylt           a += size*Q*blk_size; // Advance the pointer by the size of the output
6841d102b48SJeremy L Thompson         }
6851d102b48SJeremy L Thompson       }
6861d102b48SJeremy L Thompson       // Apply QFunction
687d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
688e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6894a2e7687Sjeremylt     }
6904a2e7687Sjeremylt   }
6914a2e7687Sjeremylt 
6921d102b48SJeremy L Thompson   // Un-set output Qvecs to prevent accidental overwrite of Assembled
693d1d35e2fSjeremylt   for (CeedInt out=0; out<num_output_fields; out++) {
6941d102b48SJeremy L Thompson     // Get output vector
695d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
696e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
6971d102b48SJeremy L Thompson     // Check if active output
6981d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
699d1d35e2fSjeremylt       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
700e15f9bd0SJeremy L Thompson                          NULL); CeedChkBackend(ierr);
7011d102b48SJeremy L Thompson     }
7021d102b48SJeremy L Thompson   }
7031d102b48SJeremy L Thompson 
7041d102b48SJeremy L Thompson   // Restore input arrays
705d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
706d1d35e2fSjeremylt          op_input_fields, true, impl); CeedChkBackend(ierr);
7071d102b48SJeremy L Thompson 
7081d102b48SJeremy L Thompson   // Output blocked restriction
709e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr);
710e15f9bd0SJeremy L Thompson   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
711d1d35e2fSjeremylt   CeedElemRestriction blk_rstr;
712d1d35e2fSjeremylt   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
713d1d35e2fSjeremylt          num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
714d1d35e2fSjeremylt          strides, &blk_rstr); CeedChkBackend(ierr);
715d1d35e2fSjeremylt   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, lvec, *assembled,
716e15f9bd0SJeremy L Thompson                                   request); CeedChkBackend(ierr);
7171d102b48SJeremy L Thompson 
7181d102b48SJeremy L Thompson   // Cleanup
719d1d35e2fSjeremylt   for (CeedInt i=0; i<num_active_in; i++) {
720d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&active_in[i]); CeedChkBackend(ierr);
72142ea3801Sjeremylt   }
722d1d35e2fSjeremylt   ierr = CeedFree(&active_in); CeedChkBackend(ierr);
723e15f9bd0SJeremy L Thompson   ierr = CeedVectorDestroy(&lvec); CeedChkBackend(ierr);
724d1d35e2fSjeremylt   ierr = CeedElemRestrictionDestroy(&blk_rstr); CeedChkBackend(ierr);
7251d102b48SJeremy L Thompson 
726e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
7274a2e7687Sjeremylt }
7284a2e7687Sjeremylt 
729f10650afSjeremylt //------------------------------------------------------------------------------
730*70a7ffb3SJeremy L Thompson // Assemble Linear QFunction
731*70a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
732*70a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
733*70a7ffb3SJeremy L Thompson     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
734*70a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, true, assembled,
735*70a7ffb3SJeremy L Thompson          rstr, request);
736*70a7ffb3SJeremy L Thompson }
737*70a7ffb3SJeremy L Thompson 
738*70a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
739*70a7ffb3SJeremy L Thompson // Update Assembled Linear QFunction
740*70a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
741*70a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunctionUpdate_Blocked(CeedOperator op,
742*70a7ffb3SJeremy L Thompson     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
743*70a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, false, &assembled,
744*70a7ffb3SJeremy L Thompson          &rstr, request);
745*70a7ffb3SJeremy L Thompson }
746*70a7ffb3SJeremy L Thompson 
747*70a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
748f10650afSjeremylt // Operator Destroy
749f10650afSjeremylt //------------------------------------------------------------------------------
750f10650afSjeremylt static int CeedOperatorDestroy_Blocked(CeedOperator op) {
751f10650afSjeremylt   int ierr;
752f10650afSjeremylt   CeedOperator_Blocked *impl;
753e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
754f10650afSjeremylt 
755d1d35e2fSjeremylt   for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) {
756d1d35e2fSjeremylt     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
757d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr);
758f10650afSjeremylt   }
759d1d35e2fSjeremylt   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
760d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr);
761d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr);
762d1d35e2fSjeremylt   ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr);
763f10650afSjeremylt 
764d1d35e2fSjeremylt   for (CeedInt i=0; i<impl->num_e_vecs_in; i++) {
765d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
766d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
767f10650afSjeremylt   }
768d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
769d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
770f10650afSjeremylt 
771d1d35e2fSjeremylt   for (CeedInt i=0; i<impl->num_e_vecs_out; i++) {
772d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
773d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
774f10650afSjeremylt   }
775d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
776d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
777f10650afSjeremylt 
778e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
779e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
780f10650afSjeremylt }
781f10650afSjeremylt 
782f10650afSjeremylt //------------------------------------------------------------------------------
783f10650afSjeremylt // Operator Create
784f10650afSjeremylt //------------------------------------------------------------------------------
7854a2e7687Sjeremylt int CeedOperatorCreate_Blocked(CeedOperator op) {
7864a2e7687Sjeremylt   int ierr;
787fe2413ffSjeremylt   Ceed ceed;
788e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
7894ce2993fSjeremylt   CeedOperator_Blocked *impl;
7904a2e7687Sjeremylt 
791e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
792e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
793fe2413ffSjeremylt 
79480ac2e43SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
79580ac2e43SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunction_Blocked);
796e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
797*70a7ffb3SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op,
798*70a7ffb3SJeremy L Thompson                                 "LinearAssembleQFunctionUpdate",
799*70a7ffb3SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunctionUpdate_Blocked);
800*70a7ffb3SJeremy L Thompson   CeedChkBackend(ierr);
801cae8b89aSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
802e15f9bd0SJeremy L Thompson                                 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr);
803fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
804e15f9bd0SJeremy L Thompson                                 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr);
805e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
8064a2e7687Sjeremylt }
807f10650afSjeremylt //------------------------------------------------------------------------------
808