xref: /libCEED/rust/libceed-sys/c-src/backends/blocked/ceed-blocked-operator.c (revision 3d8e882215d238700cdceb37404f76ca7fa24eaa)
1*3d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2*3d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
34a2e7687Sjeremylt //
4*3d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
54a2e7687Sjeremylt //
6*3d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
74a2e7687Sjeremylt 
8ec3da8bcSJed Brown #include <ceed/ceed.h>
9ec3da8bcSJed Brown #include <ceed/backend.h>
103d576824SJeremy L Thompson #include <stdbool.h>
113d576824SJeremy L Thompson #include <stddef.h>
123d576824SJeremy L Thompson #include <stdint.h>
134a2e7687Sjeremylt #include "ceed-blocked.h"
144a2e7687Sjeremylt 
15f10650afSjeremylt //------------------------------------------------------------------------------
16f10650afSjeremylt // Setup Input/Output Fields
17f10650afSjeremylt //------------------------------------------------------------------------------
1889c6efa4Sjeremylt static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
194fc1f125SJeremy L Thompson     CeedOperator op, bool is_input, CeedElemRestriction *blk_restr,
204fc1f125SJeremy L Thompson     CeedVector *e_vecs_full, CeedVector *e_vecs, CeedVector *q_vecs,
214fc1f125SJeremy L Thompson     CeedInt start_e, CeedInt num_fields, CeedInt Q) {
22ebbcc8a3SJeremy L Thompson   CeedInt ierr, num_comp, size, P;
23aedaa0e5Sjeremylt   Ceed ceed;
24e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
25d1bcdac9Sjeremylt   CeedBasis basis;
26d1bcdac9Sjeremylt   CeedElemRestriction r;
27d1d35e2fSjeremylt   CeedOperatorField *op_fields;
28d1d35e2fSjeremylt   CeedQFunctionField *qf_fields;
294fc1f125SJeremy L Thompson   if (is_input) {
307e7773b5SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
31e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
327e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
33e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
344fc1f125SJeremy L Thompson   } else {
354fc1f125SJeremy L Thompson     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields);
364fc1f125SJeremy L Thompson     CeedChkBackend(ierr);
374fc1f125SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
384fc1f125SJeremy L Thompson     CeedChkBackend(ierr);
39fe2413ffSjeremylt   }
40d1d35e2fSjeremylt   const CeedInt blk_size = 8;
414a2e7687Sjeremylt 
424a2e7687Sjeremylt   // Loop over fields
434fc1f125SJeremy L Thompson   for (CeedInt i=0; i<num_fields; i++) {
44d1d35e2fSjeremylt     CeedEvalMode eval_mode;
45d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
46d1d35e2fSjeremylt     CeedChkBackend(ierr);
474a2e7687Sjeremylt 
48d1d35e2fSjeremylt     if (eval_mode != CEED_EVAL_WEIGHT) {
49d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
50e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
51e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
52e79b91d9SJeremy L Thompson       CeedSize l_size;
53e79b91d9SJeremy L Thompson       CeedInt num_elem, elem_size, comp_stride;
54d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
55d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
56d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
57d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
58bd33150aSjeremylt 
593ac43b2cSJeremy L Thompson       bool strided;
60e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
613ac43b2cSJeremy L Thompson       if (strided) {
623ac43b2cSJeremy L Thompson         CeedInt strides[3];
63e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
64d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
65d1d35e2fSjeremylt                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
66e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
673ac43b2cSJeremy L Thompson       } else {
68bd33150aSjeremylt         const CeedInt *offsets = NULL;
69bd33150aSjeremylt         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
70e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
71d1d35e2fSjeremylt         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
72d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
73d1d35e2fSjeremylt                                                 blk_size, num_comp, comp_stride,
74d1d35e2fSjeremylt                                                 l_size, CEED_MEM_HOST,
75bd33150aSjeremylt                                                 CEED_COPY_VALUES, offsets,
76d1d35e2fSjeremylt                                                 &blk_restr[i+start_e]);
77e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
78e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
793ac43b2cSJeremy L Thompson       }
80d1d35e2fSjeremylt       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
814fc1f125SJeremy L Thompson                                              &e_vecs_full[i+start_e]);
82e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
834a2e7687Sjeremylt     }
844a2e7687Sjeremylt 
85d1d35e2fSjeremylt     switch(eval_mode) {
864a2e7687Sjeremylt     case CEED_EVAL_NONE:
87d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
88d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
89d1d35e2fSjeremylt       CeedChkBackend(ierr);
90aedaa0e5Sjeremylt       break;
91aedaa0e5Sjeremylt     case CEED_EVAL_INTERP:
924a2e7687Sjeremylt     case CEED_EVAL_GRAD:
93d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
94d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
95ebbcc8a3SJeremy L Thompson       ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr);
96ebbcc8a3SJeremy L Thompson       ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
97ebbcc8a3SJeremy L Thompson       ierr = CeedVectorCreate(ceed, P*num_comp*blk_size, &e_vecs[i]);
98e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
99d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
100d1d35e2fSjeremylt       CeedChkBackend(ierr);
1014a2e7687Sjeremylt       break;
1024a2e7687Sjeremylt     case CEED_EVAL_WEIGHT: // Only on input fields
103d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
104d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
105d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
106d1d35e2fSjeremylt                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
107e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
108aedaa0e5Sjeremylt 
1094a2e7687Sjeremylt       break;
1104a2e7687Sjeremylt     case CEED_EVAL_DIV:
1114d537eeaSYohann       break; // Not implemented
1124a2e7687Sjeremylt     case CEED_EVAL_CURL:
1134d537eeaSYohann       break; // Not implemented
1144a2e7687Sjeremylt     }
1154a2e7687Sjeremylt   }
116e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1174a2e7687Sjeremylt }
1184a2e7687Sjeremylt 
119f10650afSjeremylt //------------------------------------------------------------------------------
120f10650afSjeremylt // Setup Operator
121f10650afSjeremylt //------------------------------------------------------------------------------
1224a2e7687Sjeremylt static int CeedOperatorSetup_Blocked(CeedOperator op) {
1234a2e7687Sjeremylt   int ierr;
1248c1105f8SJeremy L Thompson   bool is_setup_done;
1258c1105f8SJeremy L Thompson   ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr);
1268c1105f8SJeremy L Thompson   if (is_setup_done) return CEED_ERROR_SUCCESS;
127aedaa0e5Sjeremylt   Ceed ceed;
128e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
1294ce2993fSjeremylt   CeedOperator_Blocked *impl;
130e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
1314ce2993fSjeremylt   CeedQFunction qf;
132e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
133d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields;
134e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
1350b454692Sjeremylt   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
136d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
1377e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
1387e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
139e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
140d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
1417e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
1427e7773b5SJeremy L Thompson                                 &qf_output_fields);
143e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1444a2e7687Sjeremylt 
1454a2e7687Sjeremylt   // Allocate
146d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
147e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1484fc1f125SJeremy L Thompson   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full);
149e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1504a2e7687Sjeremylt 
1514fc1f125SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr);
152bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
153bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
154bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
155bf4cb664SJeremy L Thompson   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
1564a2e7687Sjeremylt 
1574fc1f125SJeremy L Thompson   impl->num_inputs = num_input_fields;
1584fc1f125SJeremy L Thompson   impl->num_outputs = num_output_fields;
159aedaa0e5Sjeremylt 
1604a2e7687Sjeremylt   // Set up infield and outfield pointer arrays
1614a2e7687Sjeremylt   // Infields
1624fc1f125SJeremy L Thompson   ierr = CeedOperatorSetupFields_Blocked(qf, op, true, impl->blk_restr,
1634fc1f125SJeremy L Thompson                                          impl->e_vecs_full, impl->e_vecs_in,
164d1d35e2fSjeremylt                                          impl->q_vecs_in, 0,
165d1d35e2fSjeremylt                                          num_input_fields, Q);
166e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1674a2e7687Sjeremylt   // Outfields
1684fc1f125SJeremy L Thompson   ierr = CeedOperatorSetupFields_Blocked(qf, op, false, impl->blk_restr,
1694fc1f125SJeremy L Thompson                                          impl->e_vecs_full, impl->e_vecs_out,
170d1d35e2fSjeremylt                                          impl->q_vecs_out, num_input_fields,
171d1d35e2fSjeremylt                                          num_output_fields, Q);
172e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
173aedaa0e5Sjeremylt 
17416911fdaSjeremylt   // Identity QFunctions
1750b454692Sjeremylt   if (impl->is_identity_qf) {
176d1d35e2fSjeremylt     CeedEvalMode in_mode, out_mode;
177d1d35e2fSjeremylt     CeedQFunctionField *in_fields, *out_fields;
1787e7773b5SJeremy L Thompson     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
179e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
1800b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
181d1d35e2fSjeremylt     CeedChkBackend(ierr);
1820b454692Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
183d1d35e2fSjeremylt     CeedChkBackend(ierr);
184d1d35e2fSjeremylt 
1850b454692Sjeremylt     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
1860b454692Sjeremylt       impl->is_identity_restr_op = true;
1870b454692Sjeremylt     } else {
1880b454692Sjeremylt       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
1890b454692Sjeremylt       impl->q_vecs_out[0] = impl->q_vecs_in[0];
1900b454692Sjeremylt       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
19116911fdaSjeremylt     }
19216911fdaSjeremylt   }
19316911fdaSjeremylt 
194e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
1954a2e7687Sjeremylt 
196e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1974a2e7687Sjeremylt }
1984a2e7687Sjeremylt 
199f10650afSjeremylt //------------------------------------------------------------------------------
200f10650afSjeremylt // Setup Operator Inputs
201f10650afSjeremylt //------------------------------------------------------------------------------
202d1d35e2fSjeremylt static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields,
203d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
2044fc1f125SJeremy L Thompson     CeedVector in_vec, bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX],
205a0162de9SJeremy L Thompson     CeedOperator_Blocked *impl, CeedRequest *request) {
2061d102b48SJeremy L Thompson   CeedInt ierr;
207d1d35e2fSjeremylt   CeedEvalMode eval_mode;
208d1bcdac9Sjeremylt   CeedVector vec;
20916c359e6Sjeremylt   uint64_t state;
2104a2e7687Sjeremylt 
211d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
2121d102b48SJeremy L Thompson     // Get input vector
213d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
214d1d35e2fSjeremylt     CeedChkBackend(ierr);
2151d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
216d1d35e2fSjeremylt       if (skip_active)
2171d102b48SJeremy L Thompson         continue;
2181d102b48SJeremy L Thompson       else
219d1d35e2fSjeremylt         vec = in_vec;
2201d102b48SJeremy L Thompson     }
2211d102b48SJeremy L Thompson 
222d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
223e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
224d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
2254a2e7687Sjeremylt     } else {
2264a2e7687Sjeremylt       // Restrict
227e15f9bd0SJeremy L Thompson       ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
2284fc1f125SJeremy L Thompson       if (state != impl->input_states[i] || vec == in_vec) {
229d1d35e2fSjeremylt         ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
2304fc1f125SJeremy L Thompson                                         vec, impl->e_vecs_full[i], request);
231e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
2324fc1f125SJeremy L Thompson         impl->input_states[i] = state;
23316c359e6Sjeremylt       }
2344a2e7687Sjeremylt       // Get evec
2354fc1f125SJeremy L Thompson       ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST,
2364fc1f125SJeremy L Thompson                                     (const CeedScalar **) &e_data_full[i]);
237e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
2384a2e7687Sjeremylt     }
2394a2e7687Sjeremylt   }
240e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2414a2e7687Sjeremylt }
2424a2e7687Sjeremylt 
243f10650afSjeremylt //------------------------------------------------------------------------------
244f10650afSjeremylt // Input Basis Action
245f10650afSjeremylt //------------------------------------------------------------------------------
2461d102b48SJeremy L Thompson static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
247d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
248d1d35e2fSjeremylt     CeedInt num_input_fields, CeedInt blk_size, bool skip_active,
2494fc1f125SJeremy L Thompson     CeedScalar *e_data_full[2*CEED_FIELD_MAX], CeedOperator_Blocked *impl) {
2501d102b48SJeremy L Thompson   CeedInt ierr;
251d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
252d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
253d1d35e2fSjeremylt   CeedEvalMode eval_mode;
2541d102b48SJeremy L Thompson   CeedBasis basis;
2551d102b48SJeremy L Thompson 
256d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
2571d102b48SJeremy L Thompson     // Skip active input
258d1d35e2fSjeremylt     if (skip_active) {
2591d102b48SJeremy L Thompson       CeedVector vec;
260d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
261d1d35e2fSjeremylt       CeedChkBackend(ierr);
2621d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
2631d102b48SJeremy L Thompson         continue;
2641d102b48SJeremy L Thompson     }
2651d102b48SJeremy L Thompson 
266d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
267d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
268e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
269d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
270e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
271d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
272e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
273d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
274d1d35e2fSjeremylt     CeedChkBackend(ierr);
2754a2e7687Sjeremylt     // Basis action
276d1d35e2fSjeremylt     switch(eval_mode) {
2774a2e7687Sjeremylt     case CEED_EVAL_NONE:
278d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
2794fc1f125SJeremy L Thompson                                 CEED_USE_POINTER, &e_data_full[i][e*Q*size]);
280a0162de9SJeremy L Thompson       CeedChkBackend(ierr);
2814a2e7687Sjeremylt       break;
2824a2e7687Sjeremylt     case CEED_EVAL_INTERP:
283d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
284e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
285d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
2864fc1f125SJeremy L Thompson                                 CEED_USE_POINTER, &e_data_full[i][e*elem_size*size]);
287e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
288d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
289d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
290d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
2914a2e7687Sjeremylt       break;
2924a2e7687Sjeremylt     case CEED_EVAL_GRAD:
293d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
294e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
295e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
296d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
2974fc1f125SJeremy L Thompson                                 CEED_USE_POINTER, &e_data_full[i][e*elem_size*size/dim]);
298e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
299d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
300d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
301d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
3024a2e7687Sjeremylt       break;
3034a2e7687Sjeremylt     case CEED_EVAL_WEIGHT:
3044a2e7687Sjeremylt       break;  // No action
305bbfacfcdSjeremylt     // LCOV_EXCL_START
3064a2e7687Sjeremylt     case CEED_EVAL_DIV:
3071d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
308d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
309e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
3101d102b48SJeremy L Thompson       Ceed ceed;
311e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
312e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
313e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
314bbfacfcdSjeremylt       // LCOV_EXCL_STOP
3154a2e7687Sjeremylt     }
3164a2e7687Sjeremylt     }
31789c6efa4Sjeremylt   }
318e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
31989c6efa4Sjeremylt }
3204a2e7687Sjeremylt 
321f10650afSjeremylt //------------------------------------------------------------------------------
322f10650afSjeremylt // Output Basis Action
323f10650afSjeremylt //------------------------------------------------------------------------------
3241d102b48SJeremy L Thompson static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
325d1d35e2fSjeremylt     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
326d1d35e2fSjeremylt     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
3274fc1f125SJeremy L Thompson     CeedOperator op, CeedScalar *e_data_full[2*CEED_FIELD_MAX],
328a0162de9SJeremy L Thompson     CeedOperator_Blocked *impl) {
3291d102b48SJeremy L Thompson   CeedInt ierr;
330d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
331d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
332d1d35e2fSjeremylt   CeedEvalMode eval_mode;
3331d102b48SJeremy L Thompson   CeedBasis basis;
3341d102b48SJeremy L Thompson 
335d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
336d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
337d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
338e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
339d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
340e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
341d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
342e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
343d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
344e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
3454a2e7687Sjeremylt     // Basis action
346d1d35e2fSjeremylt     switch(eval_mode) {
3474a2e7687Sjeremylt     case CEED_EVAL_NONE:
3484a2e7687Sjeremylt       break; // No action
3494a2e7687Sjeremylt     case CEED_EVAL_INTERP:
350d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
351e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
352d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
3534fc1f125SJeremy L Thompson                                 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size]);
354e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
355d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
356d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
357d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
3584a2e7687Sjeremylt       break;
3594a2e7687Sjeremylt     case CEED_EVAL_GRAD:
360d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
361e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
362e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
363d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
3644fc1f125SJeremy L Thompson                                 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size/dim]);
365e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
366d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
367d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
368d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
3694a2e7687Sjeremylt       break;
370c042f62fSJeremy L Thompson     // LCOV_EXCL_START
371bbfacfcdSjeremylt     case CEED_EVAL_WEIGHT: {
3724ce2993fSjeremylt       Ceed ceed;
373e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
374e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
375e15f9bd0SJeremy L Thompson                        "CEED_EVAL_WEIGHT cannot be an output "
3761d102b48SJeremy L Thompson                        "evaluation mode");
3774ce2993fSjeremylt     }
3784a2e7687Sjeremylt     case CEED_EVAL_DIV:
3791d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
3801d102b48SJeremy L Thompson       Ceed ceed;
381e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
382e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
383e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
384bbfacfcdSjeremylt       // LCOV_EXCL_STOP
3854a2e7687Sjeremylt     }
38689c6efa4Sjeremylt     }
38789c6efa4Sjeremylt   }
388e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3891d102b48SJeremy L Thompson }
3901d102b48SJeremy L Thompson 
391f10650afSjeremylt //------------------------------------------------------------------------------
392f10650afSjeremylt // Restore Input Vectors
393f10650afSjeremylt //------------------------------------------------------------------------------
394d1d35e2fSjeremylt static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields,
395d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
3964fc1f125SJeremy L Thompson     bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX],
397a0162de9SJeremy L Thompson     CeedOperator_Blocked *impl) {
3981d102b48SJeremy L Thompson   CeedInt ierr;
399d1d35e2fSjeremylt   CeedEvalMode eval_mode;
4001d102b48SJeremy L Thompson 
401d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
4021d102b48SJeremy L Thompson     // Skip active inputs
403d1d35e2fSjeremylt     if (skip_active) {
4041d102b48SJeremy L Thompson       CeedVector vec;
405d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
406d1d35e2fSjeremylt       CeedChkBackend(ierr);
4071d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
4081d102b48SJeremy L Thompson         continue;
4091d102b48SJeremy L Thompson     }
410d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
411e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
412d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
4131d102b48SJeremy L Thompson     } else {
4144fc1f125SJeremy L Thompson       ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i],
4154fc1f125SJeremy L Thompson                                         (const CeedScalar **) &e_data_full[i]);
416e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
4171d102b48SJeremy L Thompson     }
4181d102b48SJeremy L Thompson   }
419e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4201d102b48SJeremy L Thompson }
4211d102b48SJeremy L Thompson 
422f10650afSjeremylt //------------------------------------------------------------------------------
423f10650afSjeremylt // Operator Apply
424f10650afSjeremylt //------------------------------------------------------------------------------
425d1d35e2fSjeremylt static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec,
426d1d35e2fSjeremylt                                         CeedVector out_vec,
4271d102b48SJeremy L Thompson                                         CeedRequest *request) {
4281d102b48SJeremy L Thompson   int ierr;
4291d102b48SJeremy L Thompson   CeedOperator_Blocked *impl;
430e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
431d1d35e2fSjeremylt   const CeedInt blk_size = 8;
432d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
433d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
434e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
435d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
4361d102b48SJeremy L Thompson   CeedQFunction qf;
437e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
438d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
4397e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
4407e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
441e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
442d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
4437e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
4447e7773b5SJeremy L Thompson                                 &qf_output_fields);
445e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
446d1d35e2fSjeremylt   CeedEvalMode eval_mode;
4471d102b48SJeremy L Thompson   CeedVector vec;
4484fc1f125SJeremy L Thompson   CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0};
4491d102b48SJeremy L Thompson 
4501d102b48SJeremy L Thompson   // Setup
451e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
4521d102b48SJeremy L Thompson 
4530b454692Sjeremylt   // Restriction only operator
4540b454692Sjeremylt   if (impl->is_identity_restr_op) {
4550b454692Sjeremylt     ierr = CeedElemRestrictionApply(impl->blk_restr[0], CEED_NOTRANSPOSE, in_vec,
4564fc1f125SJeremy L Thompson                                     impl->e_vecs_full[0], request); CeedChkBackend(ierr);
4570b454692Sjeremylt     ierr = CeedElemRestrictionApply(impl->blk_restr[1], CEED_TRANSPOSE,
4584fc1f125SJeremy L Thompson                                     impl->e_vecs_full[0], out_vec, request); CeedChkBackend(ierr);
4590b454692Sjeremylt     return CEED_ERROR_SUCCESS;
4600b454692Sjeremylt   }
4610b454692Sjeremylt 
4621d102b48SJeremy L Thompson   // Input Evecs and Restriction
463d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
4644fc1f125SJeremy L Thompson                                          op_input_fields, in_vec, false, e_data_full,
465a0162de9SJeremy L Thompson                                          impl, request); CeedChkBackend(ierr);
4661d102b48SJeremy L Thompson 
4671d102b48SJeremy L Thompson   // Output Evecs
468d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
4699c774eddSJeremy L Thompson     ierr = CeedVectorGetArrayWrite(impl->e_vecs_full[i+impl->num_inputs],
4709c774eddSJeremy L Thompson                                    CEED_MEM_HOST, &e_data_full[i + num_input_fields]);
4719c774eddSJeremy L Thompson     CeedChkBackend(ierr);
4721d102b48SJeremy L Thompson   }
4731d102b48SJeremy L Thompson 
4741d102b48SJeremy L Thompson   // Loop through elements
475d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
4761d102b48SJeremy L Thompson     // Output pointers
477d1d35e2fSjeremylt     for (CeedInt i=0; i<num_output_fields; i++) {
478d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
479e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
480d1d35e2fSjeremylt       if (eval_mode == CEED_EVAL_NONE) {
481d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
482e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
483d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
4844fc1f125SJeremy L Thompson                                   CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*Q*size]);
485e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
4861d102b48SJeremy L Thompson       }
4871d102b48SJeremy L Thompson     }
4881d102b48SJeremy L Thompson 
48916911fdaSjeremylt     // Input basis apply
490d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
4914fc1f125SJeremy L Thompson                                           num_input_fields, blk_size, false, e_data_full,
492a0162de9SJeremy L Thompson                                           impl); CeedChkBackend(ierr);
49316911fdaSjeremylt 
4941d102b48SJeremy L Thompson     // Q function
4950b454692Sjeremylt     if (!impl->is_identity_qf) {
496d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
497e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
49816911fdaSjeremylt     }
4991d102b48SJeremy L Thompson 
5001d102b48SJeremy L Thompson     // Output basis apply
501d1d35e2fSjeremylt     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields,
502d1d35e2fSjeremylt                                            blk_size, num_input_fields,
5034fc1f125SJeremy L Thompson                                            num_output_fields, op, e_data_full, impl);
504e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
5051d102b48SJeremy L Thompson   }
50689c6efa4Sjeremylt 
50789c6efa4Sjeremylt   // Output restriction
508d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
50989c6efa4Sjeremylt     // Restore evec
5104fc1f125SJeremy L Thompson     ierr = CeedVectorRestoreArray(impl->e_vecs_full[i+impl->num_inputs],
5119c774eddSJeremy L Thompson                                   &e_data_full[i + num_input_fields]);
5129c774eddSJeremy L Thompson     CeedChkBackend(ierr);
513d1bcdac9Sjeremylt     // Get output vector
514d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
515e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
51689c6efa4Sjeremylt     // Active
517d1bcdac9Sjeremylt     if (vec == CEED_VECTOR_ACTIVE)
518d1d35e2fSjeremylt       vec = out_vec;
5194a2e7687Sjeremylt     // Restrict
5204fc1f125SJeremy L Thompson     ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_inputs],
5214fc1f125SJeremy L Thompson                                     CEED_TRANSPOSE, impl->e_vecs_full[i+impl->num_inputs],
522e15f9bd0SJeremy L Thompson                                     vec, request); CeedChkBackend(ierr);
52389c6efa4Sjeremylt 
5244a2e7687Sjeremylt   }
5254a2e7687Sjeremylt 
5264a2e7687Sjeremylt   // Restore input arrays
527d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
5284fc1f125SJeremy L Thompson          op_input_fields, false, e_data_full, impl); CeedChkBackend(ierr);
5291d102b48SJeremy L Thompson 
530e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
5311d102b48SJeremy L Thompson }
5321d102b48SJeremy L Thompson 
533f10650afSjeremylt //------------------------------------------------------------------------------
53470a7ffb3SJeremy L Thompson // Core code for assembling linear QFunction
535f10650afSjeremylt //------------------------------------------------------------------------------
53670a7ffb3SJeremy L Thompson static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked(
537a0162de9SJeremy L Thompson   CeedOperator op, bool build_objects, CeedVector *assembled,
538a0162de9SJeremy L Thompson   CeedElemRestriction *rstr, CeedRequest *request) {
5391d102b48SJeremy L Thompson   int ierr;
5401d102b48SJeremy L Thompson   CeedOperator_Blocked *impl;
541e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
542d1d35e2fSjeremylt   const CeedInt blk_size = 8;
543d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
544d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
545e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
546d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
5471d102b48SJeremy L Thompson   CeedQFunction qf;
548e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
549d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
5507e7773b5SJeremy L Thompson   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
5517e7773b5SJeremy L Thompson                                &num_output_fields, &op_output_fields);
552e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
553d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
5547e7773b5SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
5557e7773b5SJeremy L Thompson                                 &qf_output_fields);
556e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
5574fc1f125SJeremy L Thompson   CeedVector vec, l_vec = impl->qf_l_vec;
5584fc1f125SJeremy L Thompson   CeedInt num_active_in = impl->num_active_in,
5594fc1f125SJeremy L Thompson           num_active_out = impl->num_active_out;
560bb219a0fSJeremy L Thompson   CeedVector *active_in = impl->qf_active_in;
5611d102b48SJeremy L Thompson   CeedScalar *a, *tmp;
5621d102b48SJeremy L Thompson   Ceed ceed;
563e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
5644fc1f125SJeremy L Thompson   CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0};
5651d102b48SJeremy L Thompson 
5661d102b48SJeremy L Thompson   // Setup
567e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
5681d102b48SJeremy L Thompson 
56916911fdaSjeremylt   // Check for identity
5700b454692Sjeremylt   if (impl->is_identity_qf)
57116911fdaSjeremylt     // LCOV_EXCL_START
572e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
573d1d35e2fSjeremylt                      "Assembling identity QFunctions not supported");
57416911fdaSjeremylt   // LCOV_EXCL_STOP
57516911fdaSjeremylt 
5761d102b48SJeremy L Thompson   // Input Evecs and Restriction
577d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
5784fc1f125SJeremy L Thompson                                          op_input_fields, NULL, true, e_data_full,
579a0162de9SJeremy L Thompson                                          impl, request); CeedChkBackend(ierr);
5801d102b48SJeremy L Thompson 
5811d102b48SJeremy L Thompson   // Count number of active input fields
582bb219a0fSJeremy L Thompson   if (!num_active_in) {
583d1d35e2fSjeremylt     for (CeedInt i=0; i<num_input_fields; i++) {
5841d102b48SJeremy L Thompson       // Get input vector
585d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
586d1d35e2fSjeremylt       CeedChkBackend(ierr);
5871d102b48SJeremy L Thompson       // Check if active input
5881d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE) {
589d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
590e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
591d1d35e2fSjeremylt         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
592d1d35e2fSjeremylt         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
593d1d35e2fSjeremylt         CeedChkBackend(ierr);
594d1d35e2fSjeremylt         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
5951d102b48SJeremy L Thompson         for (CeedInt field=0; field<size; field++) {
596d1d35e2fSjeremylt           ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
597e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
598d1d35e2fSjeremylt           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
599d1d35e2fSjeremylt                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
600e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
6011d102b48SJeremy L Thompson         }
602d1d35e2fSjeremylt         num_active_in += size;
603d1d35e2fSjeremylt         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
6041d102b48SJeremy L Thompson       }
6051d102b48SJeremy L Thompson     }
6064fc1f125SJeremy L Thompson     impl->num_active_in = num_active_in;
607bb219a0fSJeremy L Thompson     impl->qf_active_in = active_in;
608bb219a0fSJeremy L Thompson   }
6091d102b48SJeremy L Thompson 
6101d102b48SJeremy L Thompson   // Count number of active output fields
611bb219a0fSJeremy L Thompson   if (!num_active_out) {
612d1d35e2fSjeremylt     for (CeedInt i=0; i<num_output_fields; i++) {
6131d102b48SJeremy L Thompson       // Get output vector
614d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
615e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6161d102b48SJeremy L Thompson       // Check if active output
6171d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE) {
618d1d35e2fSjeremylt         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
619e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
620d1d35e2fSjeremylt         num_active_out += size;
6211d102b48SJeremy L Thompson       }
6221d102b48SJeremy L Thompson     }
6234fc1f125SJeremy L Thompson     impl->num_active_out = num_active_out;
624bb219a0fSJeremy L Thompson   }
6251d102b48SJeremy L Thompson 
6261d102b48SJeremy L Thompson   // Check sizes
627d1d35e2fSjeremylt   if (!num_active_in || !num_active_out)
6281d102b48SJeremy L Thompson     // LCOV_EXCL_START
629e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
630e15f9bd0SJeremy L Thompson                      "Cannot assemble QFunction without active inputs "
6311d102b48SJeremy L Thompson                      "and outputs");
6321d102b48SJeremy L Thompson   // LCOV_EXCL_STOP
6331d102b48SJeremy L Thompson 
634d1d35e2fSjeremylt   // Setup Lvec
6354fc1f125SJeremy L Thompson   if (!l_vec) {
636d1d35e2fSjeremylt     ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
6374fc1f125SJeremy L Thompson                             &l_vec); CeedChkBackend(ierr);
6384fc1f125SJeremy L Thompson     impl->qf_l_vec = l_vec;
639bb219a0fSJeremy L Thompson   }
6409c774eddSJeremy L Thompson   ierr = CeedVectorGetArrayWrite(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
6411d102b48SJeremy L Thompson 
64270a7ffb3SJeremy L Thompson   // Build objects if needed
643d1d35e2fSjeremylt   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
64470a7ffb3SJeremy L Thompson   if (build_objects) {
64570a7ffb3SJeremy L Thompson     // Create output restriction
646d1d35e2fSjeremylt     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
647d1d35e2fSjeremylt                                             num_active_in*num_active_out,
648d1d35e2fSjeremylt                                             num_active_in*num_active_out*num_elem*Q,
649e15f9bd0SJeremy L Thompson                                             strides, rstr); CeedChkBackend(ierr);
6501d102b48SJeremy L Thompson     // Create assembled vector
651d1d35e2fSjeremylt     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
652e15f9bd0SJeremy L Thompson                             assembled); CeedChkBackend(ierr);
65370a7ffb3SJeremy L Thompson   }
6541d102b48SJeremy L Thompson 
6551d102b48SJeremy L Thompson   // Loop through elements
656d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
6571d102b48SJeremy L Thompson     // Input basis apply
658d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
6594fc1f125SJeremy L Thompson                                           num_input_fields, blk_size, true, e_data_full,
660a0162de9SJeremy L Thompson                                           impl); CeedChkBackend(ierr);
6611d102b48SJeremy L Thompson 
6621d102b48SJeremy L Thompson     // Assemble QFunction
663d1d35e2fSjeremylt     for (CeedInt in=0; in<num_active_in; in++) {
6641d102b48SJeremy L Thompson       // Set Inputs
665d1d35e2fSjeremylt       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
666d1d35e2fSjeremylt       if (num_active_in > 1) {
667d1d35e2fSjeremylt         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
668e15f9bd0SJeremy L Thompson                                   0.0); CeedChkBackend(ierr);
66942ea3801Sjeremylt       }
6701d102b48SJeremy L Thompson       // Set Outputs
671d1d35e2fSjeremylt       for (CeedInt out=0; out<num_output_fields; out++) {
6721d102b48SJeremy L Thompson         // Get output vector
673d1d35e2fSjeremylt         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
674e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
6751d102b48SJeremy L Thompson         // Check if active output
6761d102b48SJeremy L Thompson         if (vec == CEED_VECTOR_ACTIVE) {
677d1d35e2fSjeremylt           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
678e15f9bd0SJeremy L Thompson                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
679d1d35e2fSjeremylt           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
680e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
681d1d35e2fSjeremylt           a += size*Q*blk_size; // Advance the pointer by the size of the output
6821d102b48SJeremy L Thompson         }
6831d102b48SJeremy L Thompson       }
6841d102b48SJeremy L Thompson       // Apply QFunction
685d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
686e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6874a2e7687Sjeremylt     }
6884a2e7687Sjeremylt   }
6894a2e7687Sjeremylt 
6901d102b48SJeremy L Thompson   // Un-set output Qvecs to prevent accidental overwrite of Assembled
691d1d35e2fSjeremylt   for (CeedInt out=0; out<num_output_fields; out++) {
6921d102b48SJeremy L Thompson     // Get output vector
693d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
694e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
6951d102b48SJeremy L Thompson     // Check if active output
6961d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
697d1d35e2fSjeremylt       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
698e15f9bd0SJeremy L Thompson                          NULL); CeedChkBackend(ierr);
6991d102b48SJeremy L Thompson     }
7001d102b48SJeremy L Thompson   }
7011d102b48SJeremy L Thompson 
7021d102b48SJeremy L Thompson   // Restore input arrays
703d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
7044fc1f125SJeremy L Thompson          op_input_fields, true, e_data_full, impl); CeedChkBackend(ierr);
7051d102b48SJeremy L Thompson 
7061d102b48SJeremy L Thompson   // Output blocked restriction
7074fc1f125SJeremy L Thompson   ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr);
708e15f9bd0SJeremy L Thompson   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
709bb219a0fSJeremy L Thompson   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
710bb219a0fSJeremy L Thompson   if (!impl->qf_blk_rstr) {
711d1d35e2fSjeremylt     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
712d1d35e2fSjeremylt            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
713d1d35e2fSjeremylt            strides, &blk_rstr); CeedChkBackend(ierr);
714bb219a0fSJeremy L Thompson     impl->qf_blk_rstr = blk_rstr;
715bb219a0fSJeremy L Thompson   }
7164fc1f125SJeremy L Thompson   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled,
717e15f9bd0SJeremy L Thompson                                   request); CeedChkBackend(ierr);
7181d102b48SJeremy L Thompson 
719e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
7204a2e7687Sjeremylt }
7214a2e7687Sjeremylt 
722f10650afSjeremylt //------------------------------------------------------------------------------
72370a7ffb3SJeremy L Thompson // Assemble Linear QFunction
72470a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
72570a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
72670a7ffb3SJeremy L Thompson     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
72770a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, true, assembled,
72870a7ffb3SJeremy L Thompson          rstr, request);
72970a7ffb3SJeremy L Thompson }
73070a7ffb3SJeremy L Thompson 
73170a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
73270a7ffb3SJeremy L Thompson // Update Assembled Linear QFunction
73370a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
73470a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunctionUpdate_Blocked(CeedOperator op,
73570a7ffb3SJeremy L Thompson     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
73670a7ffb3SJeremy L Thompson   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, false, &assembled,
73770a7ffb3SJeremy L Thompson          &rstr, request);
73870a7ffb3SJeremy L Thompson }
73970a7ffb3SJeremy L Thompson 
74070a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------
741f10650afSjeremylt // Operator Destroy
742f10650afSjeremylt //------------------------------------------------------------------------------
743f10650afSjeremylt static int CeedOperatorDestroy_Blocked(CeedOperator op) {
744f10650afSjeremylt   int ierr;
745f10650afSjeremylt   CeedOperator_Blocked *impl;
746e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
747f10650afSjeremylt 
7484fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) {
749d1d35e2fSjeremylt     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
7504fc1f125SJeremy L Thompson     ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr);
751f10650afSjeremylt   }
752d1d35e2fSjeremylt   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
7534fc1f125SJeremy L Thompson   ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr);
7544fc1f125SJeremy L Thompson   ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr);
755f10650afSjeremylt 
7564fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_inputs; i++) {
757d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
758d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
759f10650afSjeremylt   }
760d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
761d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
762f10650afSjeremylt 
7634fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_outputs; i++) {
764d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
765d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
766f10650afSjeremylt   }
767d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
768d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
769f10650afSjeremylt 
770bb219a0fSJeremy L Thompson   // QFunction assembly data
7714fc1f125SJeremy L Thompson   for (CeedInt i=0; i<impl->num_active_in; i++) {
772bb219a0fSJeremy L Thompson     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
773bb219a0fSJeremy L Thompson   }
774bb219a0fSJeremy L Thompson   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
7754fc1f125SJeremy L Thompson   ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr);
776bb219a0fSJeremy L Thompson   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
777bb219a0fSJeremy L Thompson 
778e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
779e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
780f10650afSjeremylt }
781f10650afSjeremylt 
782f10650afSjeremylt //------------------------------------------------------------------------------
783f10650afSjeremylt // Operator Create
784f10650afSjeremylt //------------------------------------------------------------------------------
7854a2e7687Sjeremylt int CeedOperatorCreate_Blocked(CeedOperator op) {
7864a2e7687Sjeremylt   int ierr;
787fe2413ffSjeremylt   Ceed ceed;
788e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
7894ce2993fSjeremylt   CeedOperator_Blocked *impl;
7904a2e7687Sjeremylt 
791e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
792e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
793fe2413ffSjeremylt 
79480ac2e43SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
79580ac2e43SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunction_Blocked);
796e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
79770a7ffb3SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op,
79870a7ffb3SJeremy L Thompson                                 "LinearAssembleQFunctionUpdate",
79970a7ffb3SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunctionUpdate_Blocked);
80070a7ffb3SJeremy L Thompson   CeedChkBackend(ierr);
801cae8b89aSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
802e15f9bd0SJeremy L Thompson                                 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr);
803fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
804e15f9bd0SJeremy L Thompson                                 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr);
805e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
8064a2e7687Sjeremylt }
807f10650afSjeremylt //------------------------------------------------------------------------------
808