xref: /libCEED/backends/opt/ceed-opt-operator.c (revision cfa13e89a200bdd88bed8f0c57a43555c66a749e)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <stdbool.h>
11 #include <stdint.h>
12 #include <string.h>
13 
14 #include "ceed-opt.h"
15 
16 //------------------------------------------------------------------------------
17 // Setup Input/Output Fields
18 //------------------------------------------------------------------------------
19 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, bool is_input, const CeedInt block_size, CeedElemRestriction *block_rstr,
20                                        CeedVector *e_vecs_full, CeedVector *e_vecs, CeedVector *q_vecs, CeedInt start_e, CeedInt num_fields,
21                                        CeedInt Q) {
22   Ceed                ceed;
23   CeedSize            e_size, q_size;
24   CeedInt             num_comp, size, P;
25   CeedQFunctionField *qf_fields;
26   CeedOperatorField  *op_fields;
27 
28   CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
29   if (is_input) {
30     CeedCallBackend(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
31     CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
32   } else {
33     CeedCallBackend(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
34     CeedCallBackend(CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields));
35   }
36 
37   // Loop over fields
38   for (CeedInt i = 0; i < num_fields; i++) {
39     CeedEvalMode eval_mode;
40     CeedBasis    basis;
41 
42     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
43     if (eval_mode != CEED_EVAL_WEIGHT) {
44       Ceed                ceed_rstr;
45       CeedSize            l_size;
46       CeedInt             num_elem, elem_size, comp_stride;
47       CeedRestrictionType rstr_type;
48       CeedElemRestriction rstr;
49 
50       CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
51       CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed_rstr));
52       CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem));
53       CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size));
54       CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
55       CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
56       CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
57 
58       CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type));
59       switch (rstr_type) {
60         case CEED_RESTRICTION_STANDARD: {
61           const CeedInt *offsets = NULL;
62 
63           CeedCallBackend(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
64           CeedCallBackend(CeedElemRestrictionCreateBlocked(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_stride, l_size, CEED_MEM_HOST,
65                                                            CEED_COPY_VALUES, offsets, &block_rstr[i + start_e]));
66           CeedCallBackend(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
67         } break;
68         case CEED_RESTRICTION_ORIENTED: {
69           const bool    *orients = NULL;
70           const CeedInt *offsets = NULL;
71 
72           CeedCallBackend(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
73           CeedCallBackend(CeedElemRestrictionGetOrientations(rstr, CEED_MEM_HOST, &orients));
74           CeedCallBackend(CeedElemRestrictionCreateBlockedOriented(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_stride, l_size,
75                                                                    CEED_MEM_HOST, CEED_COPY_VALUES, offsets, orients, &block_rstr[i + start_e]));
76           CeedCallBackend(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
77           CeedCallBackend(CeedElemRestrictionRestoreOrientations(rstr, &orients));
78         } break;
79         case CEED_RESTRICTION_CURL_ORIENTED: {
80           const CeedInt8 *curl_orients = NULL;
81           const CeedInt  *offsets      = NULL;
82 
83           CeedCallBackend(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
84           CeedCallBackend(CeedElemRestrictionGetCurlOrientations(rstr, CEED_MEM_HOST, &curl_orients));
85           CeedCallBackend(CeedElemRestrictionCreateBlockedCurlOriented(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_stride, l_size,
86                                                                        CEED_MEM_HOST, CEED_COPY_VALUES, offsets, curl_orients,
87                                                                        &block_rstr[i + start_e]));
88           CeedCallBackend(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
89           CeedCallBackend(CeedElemRestrictionRestoreCurlOrientations(rstr, &curl_orients));
90         } break;
91         case CEED_RESTRICTION_STRIDED: {
92           CeedInt strides[3];
93 
94           CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides));
95           CeedCallBackend(CeedElemRestrictionCreateBlockedStrided(ceed_rstr, num_elem, elem_size, block_size, num_comp, l_size, strides,
96                                                                   &block_rstr[i + start_e]));
97         } break;
98       }
99       CeedCallBackend(CeedElemRestrictionCreateVector(block_rstr[i + start_e], NULL, &e_vecs_full[i + start_e]));
100     }
101 
102     switch (eval_mode) {
103       case CEED_EVAL_NONE:
104         CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size));
105         e_size = (CeedSize)Q * size * block_size;
106         CeedCallBackend(CeedVectorCreate(ceed, e_size, &e_vecs[i]));
107         q_size = (CeedSize)Q * size * block_size;
108         CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i]));
109         break;
110       case CEED_EVAL_INTERP:
111       case CEED_EVAL_GRAD:
112       case CEED_EVAL_DIV:
113       case CEED_EVAL_CURL:
114         CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis));
115         CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size));
116         CeedCallBackend(CeedBasisGetNumNodes(basis, &P));
117         CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
118         e_size = (CeedSize)P * num_comp * block_size;
119         CeedCallBackend(CeedVectorCreate(ceed, e_size, &e_vecs[i]));
120         q_size = (CeedSize)Q * size * block_size;
121         CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i]));
122         break;
123       case CEED_EVAL_WEIGHT:  // Only on input fields
124         CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis));
125         q_size = (CeedSize)Q * block_size;
126         CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i]));
127         CeedCallBackend(CeedBasisApply(basis, block_size, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]));
128         break;
129     }
130     if (is_input && e_vecs[i]) {
131       CeedCallBackend(CeedVectorSetArray(e_vecs[i], CEED_MEM_HOST, CEED_COPY_VALUES, NULL));
132     }
133   }
134   return CEED_ERROR_SUCCESS;
135 }
136 
137 //------------------------------------------------------------------------------
138 // Setup Operator
139 //------------------------------------------------------------------------------
140 static int CeedOperatorSetup_Opt(CeedOperator op) {
141   Ceed                ceed;
142   bool                is_setup_done;
143   Ceed_Opt           *ceed_impl;
144   CeedInt             Q, num_input_fields, num_output_fields;
145   CeedQFunctionField *qf_input_fields, *qf_output_fields;
146   CeedQFunction       qf;
147   CeedOperatorField  *op_input_fields, *op_output_fields;
148   CeedOperator_Opt   *impl;
149 
150   CeedCallBackend(CeedOperatorIsSetupDone(op, &is_setup_done));
151   if (is_setup_done) return CEED_ERROR_SUCCESS;
152 
153   CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
154   CeedCallBackend(CeedGetData(ceed, &ceed_impl));
155   CeedCallBackend(CeedOperatorGetData(op, &impl));
156   CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
157   CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q));
158   CeedCallBackend(CeedQFunctionIsIdentity(qf, &impl->is_identity_qf));
159   CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
160   CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
161   const CeedInt block_size = ceed_impl->block_size;
162 
163   // Allocate
164   CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->block_rstr));
165   CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full));
166 
167   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->input_states));
168   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in));
169   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out));
170   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in));
171   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out));
172 
173   impl->num_inputs  = num_input_fields;
174   impl->num_outputs = num_output_fields;
175 
176   // Set up infield and outfield pointer arrays
177   // Infields
178   CeedCallBackend(CeedOperatorSetupFields_Opt(qf, op, true, block_size, impl->block_rstr, impl->e_vecs_full, impl->e_vecs_in, impl->q_vecs_in, 0,
179                                               num_input_fields, Q));
180   // Outfields
181   CeedCallBackend(CeedOperatorSetupFields_Opt(qf, op, false, block_size, impl->block_rstr, impl->e_vecs_full, impl->e_vecs_out, impl->q_vecs_out,
182                                               num_input_fields, num_output_fields, Q));
183 
184   // Identity QFunctions
185   if (impl->is_identity_qf) {
186     CeedEvalMode        in_mode, out_mode;
187     CeedQFunctionField *in_fields, *out_fields;
188 
189     CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields));
190     CeedCallBackend(CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode));
191     CeedCallBackend(CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode));
192 
193     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
194       impl->is_identity_rstr_op = true;
195     } else {
196       CeedCallBackend(CeedVectorReferenceCopy(impl->q_vecs_in[0], &impl->q_vecs_out[0]));
197     }
198   }
199 
200   CeedCallBackend(CeedOperatorSetSetupDone(op));
201   return CEED_ERROR_SUCCESS;
202 }
203 
204 //------------------------------------------------------------------------------
205 // Setup Input Fields
206 //------------------------------------------------------------------------------
207 static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields, CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
208                                               CeedVector in_vec, CeedScalar *e_data[2 * CEED_FIELD_MAX], CeedOperator_Opt *impl,
209                                               CeedRequest *request) {
210   for (CeedInt i = 0; i < num_input_fields; i++) {
211     uint64_t     state;
212     CeedEvalMode eval_mode;
213     CeedVector   vec;
214 
215     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
216     if (eval_mode == CEED_EVAL_WEIGHT) {  // Skip
217     } else {
218       // Get input vector
219       CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
220       if (vec != CEED_VECTOR_ACTIVE) {
221         // Restrict
222         CeedCallBackend(CeedVectorGetState(vec, &state));
223         if (state != impl->input_states[i]) {
224           CeedCallBackend(CeedElemRestrictionApply(impl->block_rstr[i], CEED_NOTRANSPOSE, vec, impl->e_vecs_full[i], request));
225           impl->input_states[i] = state;
226         }
227         // Get evec
228         CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, (const CeedScalar **)&e_data[i]));
229       } else {
230         // Set Qvec for CEED_EVAL_NONE
231         if (eval_mode == CEED_EVAL_NONE) {
232           CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs_in[i], CEED_MEM_HOST, (const CeedScalar **)&e_data[i]));
233           CeedCallBackend(CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, e_data[i]));
234           CeedCallBackend(CeedVectorRestoreArrayRead(impl->e_vecs_in[i], (const CeedScalar **)&e_data[i]));
235         }
236       }
237     }
238   }
239   return CEED_ERROR_SUCCESS;
240 }
241 
242 //------------------------------------------------------------------------------
243 // Input Basis Action
244 //------------------------------------------------------------------------------
245 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
246                                              CeedInt num_input_fields, CeedInt block_size, CeedVector in_vec, bool skip_active,
247                                              CeedScalar *e_data[2 * CEED_FIELD_MAX], CeedOperator_Opt *impl, CeedRequest *request) {
248   for (CeedInt i = 0; i < num_input_fields; i++) {
249     bool                is_active_input = false;
250     CeedInt             elem_size, size, num_comp;
251     CeedEvalMode        eval_mode;
252     CeedVector          vec;
253     CeedElemRestriction elem_rstr;
254     CeedBasis           basis;
255 
256     CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
257     // Skip active input
258     is_active_input = vec == CEED_VECTOR_ACTIVE;
259     if (skip_active && is_active_input) continue;
260 
261     // Get elem_size, eval_mode, size
262     CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr));
263     CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size));
264     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
265     CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size));
266     // Restrict block active input
267     if (is_active_input) {
268       CeedCallBackend(CeedElemRestrictionApplyBlock(impl->block_rstr[i], e / block_size, CEED_NOTRANSPOSE, in_vec, impl->e_vecs_in[i], request));
269     }
270     // Basis action
271     switch (eval_mode) {
272       case CEED_EVAL_NONE:
273         if (!is_active_input) {
274           CeedCallBackend(CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data[i][e * Q * size]));
275         }
276         break;
277       case CEED_EVAL_INTERP:
278       case CEED_EVAL_GRAD:
279       case CEED_EVAL_DIV:
280       case CEED_EVAL_CURL:
281         CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis));
282         if (!is_active_input) {
283           CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
284           CeedCallBackend(CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data[i][e * elem_size * num_comp]));
285         }
286         CeedCallBackend(CeedBasisApply(basis, block_size, CEED_NOTRANSPOSE, eval_mode, impl->e_vecs_in[i], impl->q_vecs_in[i]));
287         break;
288       case CEED_EVAL_WEIGHT:
289         break;  // No action
290     }
291   }
292   return CEED_ERROR_SUCCESS;
293 }
294 
295 //------------------------------------------------------------------------------
296 // Output Basis Action
297 //------------------------------------------------------------------------------
298 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
299                                               CeedInt block_size, CeedInt num_input_fields, CeedInt num_output_fields, CeedOperator op,
300                                               CeedVector out_vec, CeedOperator_Opt *impl, CeedRequest *request) {
301   for (CeedInt i = 0; i < num_output_fields; i++) {
302     CeedEvalMode        eval_mode;
303     CeedVector          vec;
304     CeedElemRestriction elem_rstr;
305     CeedBasis           basis;
306 
307     // Get elem_size, eval_mode, size
308     CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr));
309     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode));
310     // Basis action
311     switch (eval_mode) {
312       case CEED_EVAL_NONE:
313         break;  // No action
314       case CEED_EVAL_INTERP:
315       case CEED_EVAL_GRAD:
316       case CEED_EVAL_DIV:
317       case CEED_EVAL_CURL:
318         CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis));
319         CeedCallBackend(CeedBasisApply(basis, block_size, CEED_TRANSPOSE, eval_mode, impl->q_vecs_out[i], impl->e_vecs_out[i]));
320         break;
321       // LCOV_EXCL_START
322       case CEED_EVAL_WEIGHT: {
323         Ceed ceed;
324         CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
325         return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode");
326         // LCOV_EXCL_STOP
327       }
328     }
329     // Restrict output block
330     // Get output vector
331     CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec));
332     if (vec == CEED_VECTOR_ACTIVE) vec = out_vec;
333     // Restrict
334     CeedCallBackend(
335         CeedElemRestrictionApplyBlock(impl->block_rstr[i + impl->num_inputs], e / block_size, CEED_TRANSPOSE, impl->e_vecs_out[i], vec, request));
336   }
337   return CEED_ERROR_SUCCESS;
338 }
339 
340 //------------------------------------------------------------------------------
341 // Restore Input Vectors
342 //------------------------------------------------------------------------------
343 static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields, CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
344                                                 CeedScalar *e_data[2 * CEED_FIELD_MAX], CeedOperator_Opt *impl) {
345   for (CeedInt i = 0; i < num_input_fields; i++) {
346     CeedEvalMode eval_mode;
347     CeedVector   vec;
348 
349     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
350     CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
351     if (eval_mode != CEED_EVAL_WEIGHT && vec != CEED_VECTOR_ACTIVE) {
352       CeedCallBackend(CeedVectorRestoreArrayRead(impl->e_vecs_full[i], (const CeedScalar **)&e_data[i]));
353     }
354   }
355   return CEED_ERROR_SUCCESS;
356 }
357 
358 //------------------------------------------------------------------------------
359 // Operator Apply
360 //------------------------------------------------------------------------------
361 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec, CeedVector out_vec, CeedRequest *request) {
362   Ceed                ceed;
363   Ceed_Opt           *ceed_impl;
364   CeedInt             Q, num_input_fields, num_output_fields, num_elem;
365   CeedEvalMode        eval_mode;
366   CeedScalar         *e_data[2 * CEED_FIELD_MAX] = {0};
367   CeedQFunctionField *qf_input_fields, *qf_output_fields;
368   CeedQFunction       qf;
369   CeedOperatorField  *op_input_fields, *op_output_fields;
370   CeedOperator_Opt   *impl;
371 
372   CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
373   CeedCallBackend(CeedGetData(ceed, &ceed_impl));
374   CeedCallBackend(CeedOperatorGetData(op, &impl));
375   CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
376   CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q));
377   CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
378   CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
379   CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
380   const CeedInt block_size = ceed_impl->block_size;
381   const CeedInt num_blocks = (num_elem / block_size) + !!(num_elem % block_size);
382 
383   // Setup
384   CeedCallBackend(CeedOperatorSetup_Opt(op));
385 
386   // Restriction only operator
387   if (impl->is_identity_rstr_op) {
388     for (CeedInt b = 0; b < num_blocks; b++) {
389       CeedCallBackend(CeedElemRestrictionApplyBlock(impl->block_rstr[0], b, CEED_NOTRANSPOSE, in_vec, impl->e_vecs_in[0], request));
390       CeedCallBackend(CeedElemRestrictionApplyBlock(impl->block_rstr[1], b, CEED_TRANSPOSE, impl->e_vecs_in[0], out_vec, request));
391     }
392     return CEED_ERROR_SUCCESS;
393   }
394 
395   // Input Evecs and Restriction
396   CeedCallBackend(CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, op_input_fields, in_vec, e_data, impl, request));
397 
398   // Output Lvecs, Evecs, and Qvecs
399   for (CeedInt i = 0; i < num_output_fields; i++) {
400     // Set Qvec if needed
401     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode));
402     if (eval_mode == CEED_EVAL_NONE) {
403       // Set qvec to single block evec
404       CeedCallBackend(CeedVectorGetArrayWrite(impl->e_vecs_out[i], CEED_MEM_HOST, &e_data[i + num_input_fields]));
405       CeedCallBackend(CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, CEED_USE_POINTER, e_data[i + num_input_fields]));
406       CeedCallBackend(CeedVectorRestoreArray(impl->e_vecs_out[i], &e_data[i + num_input_fields]));
407     }
408   }
409 
410   // Loop through elements
411   for (CeedInt e = 0; e < num_blocks * block_size; e += block_size) {
412     // Input basis apply
413     CeedCallBackend(
414         CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, num_input_fields, block_size, in_vec, false, e_data, impl, request));
415 
416     // Q function
417     if (!impl->is_identity_qf) {
418       CeedCallBackend(CeedQFunctionApply(qf, Q * block_size, impl->q_vecs_in, impl->q_vecs_out));
419     }
420 
421     // Output basis apply and restriction
422     CeedCallBackend(CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields, block_size, num_input_fields, num_output_fields, op,
423                                                 out_vec, impl, request));
424   }
425 
426   // Restore input arrays
427   CeedCallBackend(CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, op_input_fields, e_data, impl));
428   return CEED_ERROR_SUCCESS;
429 }
430 
431 //------------------------------------------------------------------------------
432 // Core code for linear QFunction assembly
433 //------------------------------------------------------------------------------
434 static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
435                                                               CeedRequest *request) {
436   Ceed                ceed;
437   Ceed_Opt           *ceed_impl;
438   CeedSize            q_size;
439   CeedInt             Q, num_input_fields, num_output_fields, num_elem, size;
440   CeedScalar         *l_vec_array, *e_data[2 * CEED_FIELD_MAX] = {0};
441   CeedQFunctionField *qf_input_fields, *qf_output_fields;
442   CeedQFunction       qf;
443   CeedOperatorField  *op_input_fields, *op_output_fields;
444   CeedOperator_Opt   *impl;
445 
446   CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
447   CeedCallBackend(CeedGetData(ceed, &ceed_impl));
448   CeedCallBackend(CeedOperatorGetData(op, &impl));
449   CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
450   CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q));
451   CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
452   CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
453   CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
454   const CeedInt       block_size    = ceed_impl->block_size;
455   const CeedInt       num_blocks    = (num_elem / block_size) + !!(num_elem % block_size);
456   CeedInt             num_active_in = impl->num_active_in, num_active_out = impl->num_active_out;
457   CeedVector          l_vec      = impl->qf_l_vec;
458   CeedVector         *active_in  = impl->qf_active_in;
459   CeedElemRestriction block_rstr = impl->qf_block_rstr;
460 
461   // Setup
462   CeedCallBackend(CeedOperatorSetup_Opt(op));
463 
464   // Check for identity
465   CeedCheck(!impl->is_identity_qf, ceed, CEED_ERROR_BACKEND, "Assembling identity QFunctions not supported");
466 
467   // Input Evecs and Restriction
468   CeedCallBackend(CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, op_input_fields, NULL, e_data, impl, request));
469 
470   // Count number of active input fields
471   if (!num_active_in) {
472     for (CeedInt i = 0; i < num_input_fields; i++) {
473       CeedScalar *q_vec_array;
474       CeedVector  vec;
475 
476       // Get input vector
477       CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
478       // Check if active input
479       if (vec == CEED_VECTOR_ACTIVE) {
480         CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size));
481         CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0));
482         CeedCallBackend(CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &q_vec_array));
483         CeedCallBackend(CeedRealloc(num_active_in + size, &active_in));
484         for (CeedInt field = 0; field < size; field++) {
485           q_size = (CeedSize)Q * block_size;
486           CeedCallBackend(CeedVectorCreate(ceed, q_size, &active_in[num_active_in + field]));
487           CeedCallBackend(
488               CeedVectorSetArray(active_in[num_active_in + field], CEED_MEM_HOST, CEED_USE_POINTER, &q_vec_array[field * Q * block_size]));
489         }
490         num_active_in += size;
491         CeedCallBackend(CeedVectorRestoreArray(impl->q_vecs_in[i], &q_vec_array));
492       }
493     }
494     impl->num_active_in = num_active_in;
495     impl->qf_active_in  = active_in;
496   }
497 
498   // Count number of active output fields
499   if (!num_active_out) {
500     for (CeedInt i = 0; i < num_output_fields; i++) {
501       CeedVector vec;
502 
503       // Get output vector
504       CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec));
505       // Check if active output
506       if (vec == CEED_VECTOR_ACTIVE) {
507         CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &size));
508         num_active_out += size;
509       }
510     }
511     impl->num_active_out = num_active_out;
512   }
513 
514   // Check sizes
515   CeedCheck(num_active_in > 0 && num_active_out > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs");
516 
517   // Setup l_vec
518   if (!l_vec) {
519     const CeedSize l_size = (CeedSize)block_size * Q * num_active_in * num_active_out;
520 
521     CeedCallBackend(CeedVectorCreate(ceed, l_size, &l_vec));
522     CeedCallBackend(CeedVectorSetValue(l_vec, 0.0));
523     impl->qf_l_vec = l_vec;
524   }
525 
526   // Output blocked restriction
527   if (!block_rstr) {
528     CeedInt strides[3] = {1, Q, num_active_in * num_active_out * Q};
529 
530     CeedCallBackend(CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, block_size, num_active_in * num_active_out,
531                                                             num_active_in * num_active_out * num_elem * Q, strides, &block_rstr));
532     impl->qf_block_rstr = block_rstr;
533   }
534 
535   // Build objects if needed
536   if (build_objects) {
537     const CeedSize l_size     = (CeedSize)num_elem * Q * num_active_in * num_active_out;
538     CeedInt        strides[3] = {1, Q, num_active_in * num_active_out * Q};
539 
540     // Create output restriction
541     CeedCallBackend(CeedElemRestrictionCreateStrided(ceed, num_elem, Q, num_active_in * num_active_out, num_active_in * num_active_out * num_elem * Q,
542                                                      strides, rstr));
543     // Create assembled vector
544 
545     CeedCallBackend(CeedVectorCreate(ceed, l_size, assembled));
546   }
547 
548   // Loop through elements
549   CeedCallBackend(CeedVectorSetValue(*assembled, 0.0));
550   for (CeedInt e = 0; e < num_blocks * block_size; e += block_size) {
551     CeedCallBackend(CeedVectorGetArray(l_vec, CEED_MEM_HOST, &l_vec_array));
552 
553     // Input basis apply
554     CeedCallBackend(
555         CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, num_input_fields, block_size, NULL, true, e_data, impl, request));
556 
557     // Assemble QFunction
558     for (CeedInt in = 0; in < num_active_in; in++) {
559       // Set Inputs
560       CeedCallBackend(CeedVectorSetValue(active_in[in], 1.0));
561       if (num_active_in > 1) {
562         CeedCallBackend(CeedVectorSetValue(active_in[(in + num_active_in - 1) % num_active_in], 0.0));
563       }
564       // Set Outputs
565       for (CeedInt out = 0; out < num_output_fields; out++) {
566         CeedVector vec;
567 
568         // Get output vector
569         CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec));
570         // Check if active output
571         if (vec == CEED_VECTOR_ACTIVE) {
572           CeedCallBackend(CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_USE_POINTER, l_vec_array));
573           CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[out], &size));
574           l_vec_array += size * Q * block_size;  // Advance the pointer by the size of the output
575         }
576       }
577       // Apply QFunction
578       CeedCallBackend(CeedQFunctionApply(qf, Q * block_size, impl->q_vecs_in, impl->q_vecs_out));
579     }
580 
581     // Assemble into assembled vector
582     CeedCallBackend(CeedVectorRestoreArray(l_vec, &l_vec_array));
583     CeedCallBackend(CeedElemRestrictionApplyBlock(block_rstr, e / block_size, CEED_TRANSPOSE, l_vec, *assembled, request));
584   }
585 
586   // Un-set output Qvecs to prevent accidental overwrite of Assembled
587   for (CeedInt out = 0; out < num_output_fields; out++) {
588     CeedVector vec;
589 
590     // Get output vector
591     CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec));
592     // Check if active output
593     if (vec == CEED_VECTOR_ACTIVE) {
594       CeedCallBackend(CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES, NULL));
595     }
596   }
597 
598   // Restore input arrays
599   CeedCallBackend(CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, op_input_fields, e_data, impl));
600   return CEED_ERROR_SUCCESS;
601 }
602 
603 //------------------------------------------------------------------------------
604 // Assemble Linear QFunction
605 //------------------------------------------------------------------------------
606 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
607   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr, request);
608 }
609 
610 //------------------------------------------------------------------------------
611 // Update Assembled Linear QFunction
612 //------------------------------------------------------------------------------
613 static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op, CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
614   return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled, &rstr, request);
615 }
616 
617 //------------------------------------------------------------------------------
618 // Operator Destroy
619 //------------------------------------------------------------------------------
620 static int CeedOperatorDestroy_Opt(CeedOperator op) {
621   CeedOperator_Opt *impl;
622 
623   CeedCallBackend(CeedOperatorGetData(op, &impl));
624   for (CeedInt i = 0; i < impl->num_inputs + impl->num_outputs; i++) {
625     CeedCallBackend(CeedElemRestrictionDestroy(&impl->block_rstr[i]));
626     CeedCallBackend(CeedVectorDestroy(&impl->e_vecs_full[i]));
627   }
628   CeedCallBackend(CeedFree(&impl->block_rstr));
629   CeedCallBackend(CeedFree(&impl->e_vecs_full));
630   CeedCallBackend(CeedFree(&impl->input_states));
631 
632   for (CeedInt i = 0; i < impl->num_inputs; i++) {
633     CeedCallBackend(CeedVectorDestroy(&impl->e_vecs_in[i]));
634     CeedCallBackend(CeedVectorDestroy(&impl->q_vecs_in[i]));
635   }
636   CeedCallBackend(CeedFree(&impl->e_vecs_in));
637   CeedCallBackend(CeedFree(&impl->q_vecs_in));
638 
639   for (CeedInt i = 0; i < impl->num_outputs; i++) {
640     CeedCallBackend(CeedVectorDestroy(&impl->e_vecs_out[i]));
641     CeedCallBackend(CeedVectorDestroy(&impl->q_vecs_out[i]));
642   }
643   CeedCallBackend(CeedFree(&impl->e_vecs_out));
644   CeedCallBackend(CeedFree(&impl->q_vecs_out));
645 
646   // QFunction assembly data
647   for (CeedInt i = 0; i < impl->num_active_in; i++) {
648     CeedCallBackend(CeedVectorDestroy(&impl->qf_active_in[i]));
649   }
650   CeedCallBackend(CeedFree(&impl->qf_active_in));
651   CeedCallBackend(CeedVectorDestroy(&impl->qf_l_vec));
652   CeedCallBackend(CeedElemRestrictionDestroy(&impl->qf_block_rstr));
653 
654   CeedCallBackend(CeedFree(&impl));
655   return CEED_ERROR_SUCCESS;
656 }
657 
658 //------------------------------------------------------------------------------
659 // Operator Create
660 //------------------------------------------------------------------------------
661 int CeedOperatorCreate_Opt(CeedOperator op) {
662   Ceed              ceed;
663   Ceed_Opt         *ceed_impl;
664   CeedOperator_Opt *impl;
665 
666   CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
667   CeedCallBackend(CeedGetData(ceed, &ceed_impl));
668   const CeedInt block_size = ceed_impl->block_size;
669 
670   CeedCallBackend(CeedCalloc(1, &impl));
671   CeedCallBackend(CeedOperatorSetData(op, impl));
672 
673   CeedCheck(block_size == 1 || block_size == 8, ceed, CEED_ERROR_BACKEND, "Opt backend cannot use blocksize: %" CeedInt_FMT, block_size);
674 
675   CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", CeedOperatorLinearAssembleQFunction_Opt));
676   CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunctionUpdate", CeedOperatorLinearAssembleQFunctionUpdate_Opt));
677   CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", CeedOperatorApplyAdd_Opt));
678   CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "Destroy", CeedOperatorDestroy_Opt));
679   return CEED_ERROR_SUCCESS;
680 }
681 
682 //------------------------------------------------------------------------------
683