xref: /libCEED/backends/ref/ceed-ref-operator.c (revision 243afec996543dd9d0cad1d190b7ec15127a478e)
1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <stdbool.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 
14 #include "ceed-ref.h"
15 
16 //------------------------------------------------------------------------------
17 // Setup Input/Output Fields
18 //------------------------------------------------------------------------------
19 static int CeedOperatorSetupFields_Ref(CeedQFunction qf, CeedOperator op, bool is_input, bool *skip_rstr, CeedInt *e_data_out_indices,
20                                        bool *apply_add_basis, CeedVector *e_vecs_full, CeedVector *e_vecs, CeedVector *q_vecs, CeedInt start_e,
21                                        CeedInt num_fields, CeedInt Q) {
22   Ceed                ceed;
23   CeedSize            e_size, q_size;
24   CeedInt             num_comp, size, P;
25   CeedQFunctionField *qf_fields;
26   CeedOperatorField  *op_fields;
27 
28   {
29     Ceed ceed_parent;
30 
31     CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
32     CeedCallBackend(CeedGetParent(ceed, &ceed_parent));
33     if (ceed_parent) ceed = ceed_parent;
34   }
35   if (is_input) {
36     CeedCallBackend(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
37     CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
38   } else {
39     CeedCallBackend(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
40     CeedCallBackend(CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields));
41   }
42 
43   // Loop over fields
44   for (CeedInt i = 0; i < num_fields; i++) {
45     CeedEvalMode        eval_mode;
46     CeedElemRestriction elem_rstr;
47     CeedBasis           basis;
48 
49     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
50     if (eval_mode != CEED_EVAL_WEIGHT) {
51       CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr));
52       CeedCallBackend(CeedElemRestrictionCreateVector(elem_rstr, NULL, &e_vecs_full[i + start_e]));
53       CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
54     }
55 
56     switch (eval_mode) {
57       case CEED_EVAL_NONE:
58         CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size));
59         q_size = (CeedSize)Q * size;
60         CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i]));
61         break;
62       case CEED_EVAL_INTERP:
63       case CEED_EVAL_GRAD:
64       case CEED_EVAL_DIV:
65       case CEED_EVAL_CURL:
66         CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis));
67         CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size));
68         CeedCallBackend(CeedBasisGetNumNodes(basis, &P));
69         CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
70         e_size = (CeedSize)P * num_comp;
71         CeedCallBackend(CeedVectorCreate(ceed, e_size, &e_vecs[i]));
72         q_size = (CeedSize)Q * size;
73         CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i]));
74         CeedCallBackend(CeedBasisDestroy(&basis));
75         break;
76       case CEED_EVAL_WEIGHT:  // Only on input fields
77         CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis));
78         q_size = (CeedSize)Q;
79         CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i]));
80         CeedCallBackend(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]));
81         CeedCallBackend(CeedBasisDestroy(&basis));
82         break;
83     }
84   }
85   // Drop duplicate restrictions
86   if (is_input) {
87     for (CeedInt i = 0; i < num_fields; i++) {
88       CeedVector          vec_i;
89       CeedElemRestriction rstr_i;
90 
91       CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec_i));
92       CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr_i));
93       for (CeedInt j = i + 1; j < num_fields; j++) {
94         CeedVector          vec_j;
95         CeedElemRestriction rstr_j;
96 
97         CeedCallBackend(CeedOperatorFieldGetVector(op_fields[j], &vec_j));
98         CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[j], &rstr_j));
99         if (vec_i == vec_j && rstr_i == rstr_j) {
100           CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &e_vecs[j]));
101           CeedCallBackend(CeedVectorReferenceCopy(e_vecs_full[i + start_e], &e_vecs_full[j + start_e]));
102           skip_rstr[j] = true;
103         }
104         CeedCallBackend(CeedVectorDestroy(&vec_j));
105         CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j));
106       }
107       CeedCallBackend(CeedVectorDestroy(&vec_i));
108       CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i));
109     }
110   } else {
111     for (CeedInt i = num_fields - 1; i >= 0; i--) {
112       CeedVector          vec_i;
113       CeedElemRestriction rstr_i;
114 
115       CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec_i));
116       CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr_i));
117       for (CeedInt j = i - 1; j >= 0; j--) {
118         CeedVector          vec_j;
119         CeedElemRestriction rstr_j;
120 
121         CeedCallBackend(CeedOperatorFieldGetVector(op_fields[j], &vec_j));
122         CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[j], &rstr_j));
123         if (vec_i == vec_j && rstr_i == rstr_j) {
124           CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &e_vecs[j]));
125           CeedCallBackend(CeedVectorReferenceCopy(e_vecs_full[i + start_e], &e_vecs_full[j + start_e]));
126           skip_rstr[j]          = true;
127           apply_add_basis[i]    = true;
128           e_data_out_indices[j] = i;
129         }
130         CeedCallBackend(CeedVectorDestroy(&vec_j));
131         CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j));
132       }
133       CeedCallBackend(CeedVectorDestroy(&vec_i));
134       CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i));
135     }
136   }
137   return CEED_ERROR_SUCCESS;
138 }
139 
140 //------------------------------------------------------------------------------
141 // Setup Operator
142 //------------------------------------------------------------------------------/*
143 static int CeedOperatorSetup_Ref(CeedOperator op) {
144   bool                is_setup_done;
145   CeedInt             Q, num_input_fields, num_output_fields;
146   CeedQFunctionField *qf_input_fields, *qf_output_fields;
147   CeedQFunction       qf;
148   CeedOperatorField  *op_input_fields, *op_output_fields;
149   CeedOperator_Ref   *impl;
150 
151   CeedCallBackend(CeedOperatorIsSetupDone(op, &is_setup_done));
152   if (is_setup_done) return CEED_ERROR_SUCCESS;
153 
154   CeedCallBackend(CeedOperatorGetData(op, &impl));
155   CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
156   CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q));
157   CeedCallBackend(CeedQFunctionIsIdentity(qf, &impl->is_identity_qf));
158   CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
159   CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
160 
161   // Allocate
162   CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full));
163 
164   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->skip_rstr_in));
165   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->skip_rstr_out));
166   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_data_out_indices));
167   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->apply_add_basis_out));
168   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->input_states));
169   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in));
170   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out));
171   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in));
172   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out));
173 
174   impl->num_inputs  = num_input_fields;
175   impl->num_outputs = num_output_fields;
176 
177   // Set up infield and outfield e_vecs and q_vecs
178   // Infields
179   CeedCallBackend(CeedOperatorSetupFields_Ref(qf, op, true, impl->skip_rstr_in, NULL, NULL, impl->e_vecs_full, impl->e_vecs_in, impl->q_vecs_in, 0,
180                                               num_input_fields, Q));
181   // Outfields
182   CeedCallBackend(CeedOperatorSetupFields_Ref(qf, op, false, impl->skip_rstr_out, impl->e_data_out_indices, impl->apply_add_basis_out,
183                                               impl->e_vecs_full, impl->e_vecs_out, impl->q_vecs_out, num_input_fields, num_output_fields, Q));
184 
185   // Identity QFunctions
186   if (impl->is_identity_qf) {
187     CeedEvalMode        in_mode, out_mode;
188     CeedQFunctionField *in_fields, *out_fields;
189 
190     CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields));
191     CeedCallBackend(CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode));
192     CeedCallBackend(CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode));
193 
194     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
195       impl->is_identity_rstr_op = true;
196     } else {
197       CeedCallBackend(CeedVectorReferenceCopy(impl->q_vecs_in[0], &impl->q_vecs_out[0]));
198     }
199   }
200 
201   CeedCallBackend(CeedOperatorSetSetupDone(op));
202   return CEED_ERROR_SUCCESS;
203 }
204 
205 //------------------------------------------------------------------------------
206 // Setup Operator Inputs
207 //------------------------------------------------------------------------------
208 static inline int CeedOperatorSetupInputs_Ref(CeedInt num_input_fields, CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
209                                               CeedVector in_vec, const bool skip_active, CeedScalar *e_data_full[2 * CEED_FIELD_MAX],
210                                               CeedOperator_Ref *impl, CeedRequest *request) {
211   for (CeedInt i = 0; i < num_input_fields; i++) {
212     bool         is_active;
213     uint64_t     state;
214     CeedEvalMode eval_mode;
215     CeedVector   vec;
216 
217     // Get input vector
218     CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
219     is_active = vec == CEED_VECTOR_ACTIVE;
220     if (is_active) {
221       if (skip_active) continue;
222       else vec = in_vec;
223     }
224 
225     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
226     // Restrict and Evec
227     if (eval_mode == CEED_EVAL_WEIGHT) {  // Skip
228     } else {
229       // Restrict
230       CeedCallBackend(CeedVectorGetState(vec, &state));
231       // Skip restriction if input is unchanged
232       if ((state != impl->input_states[i] || vec == in_vec) && !impl->skip_rstr_in[i]) {
233         CeedElemRestriction elem_rstr;
234 
235         CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr));
236         CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, vec, impl->e_vecs_full[i], request));
237         CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
238       }
239       impl->input_states[i] = state;
240       // Get evec
241       CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, (const CeedScalar **)&e_data_full[i]));
242     }
243     if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec));
244   }
245   return CEED_ERROR_SUCCESS;
246 }
247 
248 //------------------------------------------------------------------------------
249 // Input Basis Action
250 //------------------------------------------------------------------------------
251 static inline int CeedOperatorInputBasis_Ref(CeedInt e, CeedInt Q, CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
252                                              CeedInt num_input_fields, const bool skip_active, CeedScalar *e_data_full[2 * CEED_FIELD_MAX],
253                                              CeedOperator_Ref *impl) {
254   for (CeedInt i = 0; i < num_input_fields; i++) {
255     CeedInt             elem_size, size, num_comp;
256     CeedEvalMode        eval_mode;
257     CeedElemRestriction elem_rstr;
258     CeedBasis           basis;
259 
260     // Skip active input
261     if (skip_active) {
262       bool       is_active;
263       CeedVector vec;
264 
265       CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
266       is_active = vec == CEED_VECTOR_ACTIVE;
267       CeedCallBackend(CeedVectorDestroy(&vec));
268       if (is_active) continue;
269     }
270     // Get elem_size, eval_mode, size
271     CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr));
272     CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size));
273     CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
274     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
275     CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size));
276     // Basis action
277     switch (eval_mode) {
278       case CEED_EVAL_NONE:
279         CeedCallBackend(CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data_full[i][(CeedSize)e * Q * size]));
280         break;
281       case CEED_EVAL_INTERP:
282       case CEED_EVAL_GRAD:
283       case CEED_EVAL_DIV:
284       case CEED_EVAL_CURL:
285         CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis));
286         CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
287         CeedCallBackend(CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data_full[i][(CeedSize)e * elem_size * num_comp]));
288         CeedCallBackend(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, eval_mode, impl->e_vecs_in[i], impl->q_vecs_in[i]));
289         CeedCallBackend(CeedBasisDestroy(&basis));
290         break;
291       case CEED_EVAL_WEIGHT:
292         break;  // No action
293     }
294   }
295   return CEED_ERROR_SUCCESS;
296 }
297 
298 //------------------------------------------------------------------------------
299 // Output Basis Action
300 //------------------------------------------------------------------------------
301 static inline int CeedOperatorOutputBasis_Ref(CeedInt e, CeedInt Q, CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
302                                               CeedInt num_input_fields, CeedInt num_output_fields, bool *apply_add_basis, CeedOperator op,
303                                               CeedScalar *e_data_full[2 * CEED_FIELD_MAX], CeedOperator_Ref *impl) {
304   for (CeedInt i = 0; i < num_output_fields; i++) {
305     CeedInt             elem_size, num_comp;
306     CeedEvalMode        eval_mode;
307     CeedElemRestriction elem_rstr;
308     CeedBasis           basis;
309 
310     // Get elem_size, eval_mode
311     CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr));
312     CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size));
313     CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
314     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode));
315     // Basis action
316     switch (eval_mode) {
317       case CEED_EVAL_NONE:
318         break;  // No action
319       case CEED_EVAL_INTERP:
320       case CEED_EVAL_GRAD:
321       case CEED_EVAL_DIV:
322       case CEED_EVAL_CURL:
323         CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis));
324         CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
325         CeedCallBackend(CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST, CEED_USE_POINTER,
326                                            &e_data_full[i + num_input_fields][(CeedSize)e * elem_size * num_comp]));
327         if (apply_add_basis[i]) {
328           CeedCallBackend(CeedBasisApplyAdd(basis, 1, CEED_TRANSPOSE, eval_mode, impl->q_vecs_out[i], impl->e_vecs_out[i]));
329         } else {
330           CeedCallBackend(CeedBasisApply(basis, 1, CEED_TRANSPOSE, eval_mode, impl->q_vecs_out[i], impl->e_vecs_out[i]));
331         }
332         CeedCallBackend(CeedBasisDestroy(&basis));
333         break;
334       // LCOV_EXCL_START
335       case CEED_EVAL_WEIGHT: {
336         return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode");
337         // LCOV_EXCL_STOP
338       }
339     }
340   }
341   return CEED_ERROR_SUCCESS;
342 }
343 
344 //------------------------------------------------------------------------------
345 // Restore Input Vectors
346 //------------------------------------------------------------------------------
347 static inline int CeedOperatorRestoreInputs_Ref(CeedInt num_input_fields, CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
348                                                 const bool skip_active, CeedScalar *e_data_full[2 * CEED_FIELD_MAX], CeedOperator_Ref *impl) {
349   for (CeedInt i = 0; i < num_input_fields; i++) {
350     CeedEvalMode eval_mode;
351 
352     // Skip active inputs
353     if (skip_active) {
354       bool       is_active;
355       CeedVector vec;
356 
357       CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
358       is_active = vec == CEED_VECTOR_ACTIVE;
359       CeedCallBackend(CeedVectorDestroy(&vec));
360       if (is_active) continue;
361     }
362     // Restore input
363     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
364     if (eval_mode == CEED_EVAL_WEIGHT) {  // Skip
365     } else {
366       CeedCallBackend(CeedVectorRestoreArrayRead(impl->e_vecs_full[i], (const CeedScalar **)&e_data_full[i]));
367     }
368   }
369   return CEED_ERROR_SUCCESS;
370 }
371 
372 //------------------------------------------------------------------------------
373 // Operator Apply
374 //------------------------------------------------------------------------------
375 static int CeedOperatorApplyAdd_Ref(CeedOperator op, CeedVector in_vec, CeedVector out_vec, CeedRequest *request) {
376   CeedInt             Q, num_elem, num_input_fields, num_output_fields, size;
377   CeedEvalMode        eval_mode;
378   CeedScalar         *e_data_full[2 * CEED_FIELD_MAX] = {NULL};
379   CeedQFunctionField *qf_input_fields, *qf_output_fields;
380   CeedQFunction       qf;
381   CeedOperatorField  *op_input_fields, *op_output_fields;
382   CeedOperator_Ref   *impl;
383 
384   CeedCallBackend(CeedOperatorGetData(op, &impl));
385   CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
386   CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q));
387   CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
388   CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
389   CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
390 
391   // Setup
392   CeedCallBackend(CeedOperatorSetup_Ref(op));
393 
394   // Restriction only operator
395   if (impl->is_identity_rstr_op) {
396     CeedElemRestriction elem_rstr;
397 
398     CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[0], &elem_rstr));
399     CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, in_vec, impl->e_vecs_full[0], request));
400     CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
401     CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[0], &elem_rstr));
402     CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, impl->e_vecs_full[0], out_vec, request));
403     CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
404     return CEED_ERROR_SUCCESS;
405   }
406 
407   // Input Evecs and Restriction
408   CeedCallBackend(CeedOperatorSetupInputs_Ref(num_input_fields, qf_input_fields, op_input_fields, in_vec, false, e_data_full, impl, request));
409 
410   // Output Evecs
411   for (CeedInt i = num_output_fields - 1; i >= 0; i--) {
412     if (impl->skip_rstr_out[i]) {
413       e_data_full[i + num_input_fields] = e_data_full[impl->e_data_out_indices[i] + num_input_fields];
414     } else {
415       CeedCallBackend(CeedVectorGetArrayWrite(impl->e_vecs_full[i + impl->num_inputs], CEED_MEM_HOST, &e_data_full[i + num_input_fields]));
416     }
417   }
418 
419   // Loop through elements
420   for (CeedInt e = 0; e < num_elem; e++) {
421     // Output pointers
422     for (CeedInt i = 0; i < num_output_fields; i++) {
423       CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode));
424       if (eval_mode == CEED_EVAL_NONE) {
425         CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &size));
426         CeedCallBackend(
427             CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data_full[i + num_input_fields][(CeedSize)e * Q * size]));
428       }
429     }
430 
431     // Input basis apply
432     CeedCallBackend(CeedOperatorInputBasis_Ref(e, Q, qf_input_fields, op_input_fields, num_input_fields, false, e_data_full, impl));
433 
434     // Q function
435     if (!impl->is_identity_qf) {
436       CeedCallBackend(CeedQFunctionApply(qf, Q, impl->q_vecs_in, impl->q_vecs_out));
437     }
438 
439     // Output basis apply
440     CeedCallBackend(CeedOperatorOutputBasis_Ref(e, Q, qf_output_fields, op_output_fields, num_input_fields, num_output_fields,
441                                                 impl->apply_add_basis_out, op, e_data_full, impl));
442   }
443 
444   // Output restriction
445   for (CeedInt i = 0; i < num_output_fields; i++) {
446     bool                is_active;
447     CeedVector          vec;
448     CeedElemRestriction elem_rstr;
449 
450     if (impl->skip_rstr_out[i]) continue;
451     // Restore Evec
452     CeedCallBackend(CeedVectorRestoreArray(impl->e_vecs_full[i + impl->num_inputs], &e_data_full[i + num_input_fields]));
453     // Get output vector
454     CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec));
455     // Active
456     is_active = vec == CEED_VECTOR_ACTIVE;
457     if (is_active) vec = out_vec;
458     // Restrict
459     CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr));
460     CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, impl->e_vecs_full[i + impl->num_inputs], vec, request));
461     if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec));
462     CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
463   }
464 
465   // Restore input arrays
466   CeedCallBackend(CeedOperatorRestoreInputs_Ref(num_input_fields, qf_input_fields, op_input_fields, false, e_data_full, impl));
467   return CEED_ERROR_SUCCESS;
468 }
469 
470 //------------------------------------------------------------------------------
471 // Core code for assembling linear QFunction
472 //------------------------------------------------------------------------------
473 static inline int CeedOperatorLinearAssembleQFunctionCore_Ref(CeedOperator op, bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
474                                                               CeedRequest *request) {
475   Ceed                ceed, ceed_parent;
476   CeedInt             qf_size_in, qf_size_out, Q, num_elem, num_input_fields, num_output_fields;
477   CeedScalar         *assembled_array, *e_data_full[2 * CEED_FIELD_MAX] = {NULL};
478   CeedQFunctionField *qf_input_fields, *qf_output_fields;
479   CeedQFunction       qf;
480   CeedOperatorField  *op_input_fields, *op_output_fields;
481   CeedOperator_Ref   *impl;
482 
483   CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
484   CeedCallBackend(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
485   CeedCallBackend(CeedOperatorGetData(op, &impl));
486   qf_size_in  = impl->qf_size_in;
487   qf_size_out = impl->qf_size_out;
488   CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
489   CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q));
490   CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
491   CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
492   CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
493 
494   // Setup
495   CeedCallBackend(CeedOperatorSetup_Ref(op));
496 
497   // Check for restriction only operator
498   CeedCheck(!impl->is_identity_rstr_op, ceed, CEED_ERROR_BACKEND, "Assembling restriction only operators is not supported");
499 
500   // Input Evecs and Restriction
501   CeedCallBackend(CeedOperatorSetupInputs_Ref(num_input_fields, qf_input_fields, op_input_fields, NULL, true, e_data_full, impl, request));
502 
503   // Count number of active input fields
504   if (qf_size_in == 0) {
505     for (CeedInt i = 0; i < num_input_fields; i++) {
506       CeedInt    field_size;
507       CeedVector vec;
508 
509       // Get input vector
510       CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
511       // Check if active input
512       if (vec == CEED_VECTOR_ACTIVE) {
513         CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size));
514         CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0));
515         qf_size_in += field_size;
516       }
517       CeedCallBackend(CeedVectorDestroy(&vec));
518     }
519     CeedCheck(qf_size_in > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs");
520     impl->qf_size_in = qf_size_in;
521   }
522 
523   // Count number of active output fields
524   if (qf_size_out == 0) {
525     for (CeedInt i = 0; i < num_output_fields; i++) {
526       CeedInt    field_size;
527       CeedVector vec;
528 
529       // Get output vector
530       CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec));
531       // Check if active output
532       if (vec == CEED_VECTOR_ACTIVE) {
533         CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &field_size));
534         qf_size_out += field_size;
535       }
536       CeedCallBackend(CeedVectorDestroy(&vec));
537     }
538     CeedCheck(qf_size_out > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs");
539     impl->qf_size_out = qf_size_out;
540   }
541 
542   // Build objects if needed
543   if (build_objects) {
544     const CeedSize l_size     = (CeedSize)num_elem * Q * qf_size_in * qf_size_out;
545     CeedInt        strides[3] = {1, Q, qf_size_in * qf_size_out * Q}; /* *NOPAD* */
546 
547     // Create output restriction
548     CeedCallBackend(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, Q, qf_size_in * qf_size_out,
549                                                      (CeedSize)qf_size_in * (CeedSize)qf_size_out * (CeedSize)num_elem * (CeedSize)Q, strides, rstr));
550     // Create assembled vector
551     CeedCallBackend(CeedVectorCreate(ceed_parent, l_size, assembled));
552   }
553   // Clear output vector
554   CeedCallBackend(CeedVectorSetValue(*assembled, 0.0));
555   CeedCallBackend(CeedVectorGetArray(*assembled, CEED_MEM_HOST, &assembled_array));
556 
557   // Loop through elements
558   for (CeedInt e = 0; e < num_elem; e++) {
559     // Input basis apply
560     CeedCallBackend(CeedOperatorInputBasis_Ref(e, Q, qf_input_fields, op_input_fields, num_input_fields, true, e_data_full, impl));
561 
562     // Assemble QFunction
563 
564     for (CeedInt i = 0; i < num_input_fields; i++) {
565       bool       is_active;
566       CeedInt    field_size;
567       CeedVector vec;
568 
569       // Set Inputs
570       CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
571       is_active = vec == CEED_VECTOR_ACTIVE;
572       CeedCallBackend(CeedVectorDestroy(&vec));
573       if (!is_active) continue;
574       CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size));
575       for (CeedInt field = 0; field < field_size; field++) {
576         // Set current portion of input to 1.0
577         {
578           CeedScalar *array;
579 
580           CeedCallBackend(CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &array));
581           for (CeedInt j = 0; j < Q; j++) array[field * Q + j] = 1.0;
582           CeedCallBackend(CeedVectorRestoreArray(impl->q_vecs_in[i], &array));
583         }
584 
585         if (!impl->is_identity_qf) {
586           // Set Outputs
587           for (CeedInt out = 0; out < num_output_fields; out++) {
588             CeedVector vec;
589 
590             // Get output vector
591             CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec));
592             // Check if active output
593             if (vec == CEED_VECTOR_ACTIVE) {
594               CeedInt field_size;
595 
596               CeedCallBackend(CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_USE_POINTER, assembled_array));
597               CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[out], &field_size));
598               assembled_array += field_size * Q;  // Advance the pointer by the size of the output
599             }
600             CeedCallBackend(CeedVectorDestroy(&vec));
601           }
602           // Apply QFunction
603           CeedCallBackend(CeedQFunctionApply(qf, Q, impl->q_vecs_in, impl->q_vecs_out));
604         } else {
605           CeedInt           field_size;
606           const CeedScalar *array;
607 
608           // Copy Identity Outputs
609           CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[0], &field_size));
610           CeedCallBackend(CeedVectorGetArrayRead(impl->q_vecs_out[0], CEED_MEM_HOST, &array));
611           for (CeedInt j = 0; j < field_size * Q; j++) assembled_array[j] = array[j];
612           CeedCallBackend(CeedVectorRestoreArrayRead(impl->q_vecs_out[0], &array));
613           assembled_array += field_size * Q;
614         }
615         // Reset input to 0.0
616         {
617           CeedScalar *array;
618 
619           CeedCallBackend(CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &array));
620           for (CeedInt j = 0; j < Q; j++) array[field * Q + j] = 0.0;
621           CeedCallBackend(CeedVectorRestoreArray(impl->q_vecs_in[i], &array));
622         }
623       }
624     }
625   }
626 
627   // Un-set output Qvecs to prevent accidental overwrite of Assembled
628   if (!impl->is_identity_qf) {
629     for (CeedInt out = 0; out < num_output_fields; out++) {
630       CeedVector vec;
631 
632       // Get output vector
633       CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec));
634       // Check if active output
635       if (vec == CEED_VECTOR_ACTIVE && num_elem > 0) {
636         CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[out], CEED_MEM_HOST, NULL));
637       }
638       CeedCallBackend(CeedVectorDestroy(&vec));
639     }
640   }
641 
642   // Restore input arrays
643   CeedCallBackend(CeedOperatorRestoreInputs_Ref(num_input_fields, qf_input_fields, op_input_fields, true, e_data_full, impl));
644 
645   // Restore output
646   CeedCallBackend(CeedVectorRestoreArray(*assembled, &assembled_array));
647   return CEED_ERROR_SUCCESS;
648 }
649 
650 //------------------------------------------------------------------------------
651 // Assemble Linear QFunction
652 //------------------------------------------------------------------------------
653 static int CeedOperatorLinearAssembleQFunction_Ref(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
654   return CeedOperatorLinearAssembleQFunctionCore_Ref(op, true, assembled, rstr, request);
655 }
656 
657 //------------------------------------------------------------------------------
658 // Update Assembled Linear QFunction
659 //------------------------------------------------------------------------------
660 static int CeedOperatorLinearAssembleQFunctionUpdate_Ref(CeedOperator op, CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
661   return CeedOperatorLinearAssembleQFunctionCore_Ref(op, false, &assembled, &rstr, request);
662 }
663 
664 //------------------------------------------------------------------------------
665 // Setup Input/Output Fields
666 //------------------------------------------------------------------------------
667 static int CeedOperatorSetupFieldsAtPoints_Ref(CeedQFunction qf, CeedOperator op, bool is_input, bool *skip_rstr, bool *apply_add_basis,
668                                                CeedVector *e_vecs_full, CeedVector *e_vecs, CeedVector *q_vecs, CeedInt start_e, CeedInt num_fields,
669                                                CeedInt Q) {
670   Ceed                ceed;
671   CeedSize            e_size, q_size;
672   CeedInt             max_num_points, num_comp, size, P;
673   CeedQFunctionField *qf_fields;
674   CeedOperatorField  *op_fields;
675 
676   {
677     Ceed ceed_parent;
678 
679     CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
680     CeedCallBackend(CeedGetParent(ceed, &ceed_parent));
681     if (ceed_parent) ceed = ceed_parent;
682   }
683   if (is_input) {
684     CeedCallBackend(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
685     CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
686   } else {
687     CeedCallBackend(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
688     CeedCallBackend(CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields));
689   }
690 
691   // Get max number of points
692   {
693     CeedInt             dim;
694     CeedElemRestriction rstr_points = NULL;
695     CeedOperator_Ref   *impl;
696 
697     CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, NULL));
698     CeedCallBackend(CeedElemRestrictionGetMaxPointsInElement(rstr_points, &max_num_points));
699     CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_points, &dim));
700     CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
701     CeedCallBackend(CeedOperatorGetData(op, &impl));
702     if (is_input) {
703       CeedCallBackend(CeedVectorCreate(ceed, dim * max_num_points, &impl->point_coords_elem));
704       CeedCallBackend(CeedVectorSetValue(impl->point_coords_elem, 0.0));
705     }
706   }
707 
708   // Loop over fields
709   for (CeedInt i = 0; i < num_fields; i++) {
710     CeedEvalMode eval_mode;
711     CeedBasis    basis;
712 
713     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
714     if (eval_mode != CEED_EVAL_WEIGHT) {
715       CeedElemRestriction elem_rstr;
716 
717       CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr));
718       CeedCallBackend(CeedElemRestrictionCreateVector(elem_rstr, NULL, &e_vecs_full[i + start_e]));
719       CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
720       CeedCallBackend(CeedVectorSetValue(e_vecs_full[i + start_e], 0.0));
721     }
722 
723     switch (eval_mode) {
724       case CEED_EVAL_NONE: {
725         CeedVector vec;
726 
727         CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size));
728         e_size = (CeedSize)max_num_points * size;
729         CeedCallBackend(CeedVectorCreate(ceed, e_size, &e_vecs[i]));
730         CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec));
731         if (vec == CEED_VECTOR_ACTIVE || !is_input) {
732           CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &q_vecs[i]));
733         } else {
734           q_size = (CeedSize)max_num_points * size;
735           CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i]));
736         }
737         CeedCallBackend(CeedVectorDestroy(&vec));
738         break;
739       }
740       case CEED_EVAL_INTERP:
741       case CEED_EVAL_GRAD:
742       case CEED_EVAL_DIV:
743       case CEED_EVAL_CURL:
744         CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis));
745         CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size));
746         CeedCallBackend(CeedBasisGetNumNodes(basis, &P));
747         CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
748         e_size = (CeedSize)P * num_comp;
749         CeedCallBackend(CeedVectorCreate(ceed, e_size, &e_vecs[i]));
750         q_size = (CeedSize)max_num_points * size;
751         CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i]));
752         CeedCallBackend(CeedBasisDestroy(&basis));
753         break;
754       case CEED_EVAL_WEIGHT:  // Only on input fields
755         CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis));
756         q_size = (CeedSize)max_num_points;
757         CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i]));
758         CeedCallBackend(
759             CeedBasisApplyAtPoints(basis, 1, &max_num_points, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, CEED_VECTOR_NONE, q_vecs[i]));
760         CeedCallBackend(CeedBasisDestroy(&basis));
761         break;
762     }
763     // Initialize full arrays for E-vectors and Q-vectors
764     if (e_vecs[i]) CeedCallBackend(CeedVectorSetValue(e_vecs[i], 0.0));
765     if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorSetValue(q_vecs[i], 0.0));
766   }
767   // Drop duplicate restrictions
768   if (is_input) {
769     for (CeedInt i = 0; i < num_fields; i++) {
770       CeedVector          vec_i;
771       CeedElemRestriction rstr_i;
772 
773       CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec_i));
774       CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr_i));
775       for (CeedInt j = i + 1; j < num_fields; j++) {
776         CeedVector          vec_j;
777         CeedElemRestriction rstr_j;
778 
779         CeedCallBackend(CeedOperatorFieldGetVector(op_fields[j], &vec_j));
780         CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[j], &rstr_j));
781         if (vec_i == vec_j && rstr_i == rstr_j) {
782           CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &e_vecs[j]));
783           CeedCallBackend(CeedVectorReferenceCopy(e_vecs_full[i + start_e], &e_vecs_full[j + start_e]));
784           skip_rstr[j] = true;
785         }
786         CeedCallBackend(CeedVectorDestroy(&vec_j));
787         CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j));
788       }
789       CeedCallBackend(CeedVectorDestroy(&vec_i));
790       CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i));
791     }
792   } else {
793     for (CeedInt i = num_fields - 1; i >= 0; i--) {
794       CeedVector          vec_i;
795       CeedElemRestriction rstr_i;
796 
797       CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec_i));
798       CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr_i));
799       for (CeedInt j = i - 1; j >= 0; j--) {
800         CeedVector          vec_j;
801         CeedElemRestriction rstr_j;
802 
803         CeedCallBackend(CeedOperatorFieldGetVector(op_fields[j], &vec_j));
804         CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[j], &rstr_j));
805         if (vec_i == vec_j && rstr_i == rstr_j) {
806           CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &e_vecs[j]));
807           CeedCallBackend(CeedVectorReferenceCopy(e_vecs_full[i + start_e], &e_vecs_full[j + start_e]));
808           skip_rstr[j]       = true;
809           apply_add_basis[i] = true;
810         }
811         CeedCallBackend(CeedVectorDestroy(&vec_j));
812         CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j));
813       }
814       CeedCallBackend(CeedVectorDestroy(&vec_i));
815       CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i));
816     }
817   }
818   return CEED_ERROR_SUCCESS;
819 }
820 
821 //------------------------------------------------------------------------------
822 // Setup Operator
823 //------------------------------------------------------------------------------
824 static int CeedOperatorSetupAtPoints_Ref(CeedOperator op) {
825   bool                is_setup_done;
826   CeedInt             Q, num_input_fields, num_output_fields;
827   CeedQFunctionField *qf_input_fields, *qf_output_fields;
828   CeedQFunction       qf;
829   CeedOperatorField  *op_input_fields, *op_output_fields;
830   CeedOperator_Ref   *impl;
831 
832   CeedCallBackend(CeedOperatorIsSetupDone(op, &is_setup_done));
833   if (is_setup_done) return CEED_ERROR_SUCCESS;
834 
835   CeedCallBackend(CeedOperatorGetData(op, &impl));
836   CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
837   CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q));
838   CeedCallBackend(CeedQFunctionIsIdentity(qf, &impl->is_identity_qf));
839   CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
840   CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
841 
842   // Allocate
843   CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full));
844 
845   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->skip_rstr_in));
846   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->skip_rstr_out));
847   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->apply_add_basis_out));
848   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->input_states));
849   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in));
850   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out));
851   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in));
852   CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out));
853 
854   impl->num_inputs  = num_input_fields;
855   impl->num_outputs = num_output_fields;
856 
857   // Set up infield and outfield pointer arrays
858   // Infields
859   CeedCallBackend(CeedOperatorSetupFieldsAtPoints_Ref(qf, op, true, impl->skip_rstr_in, NULL, impl->e_vecs_full, impl->e_vecs_in, impl->q_vecs_in, 0,
860                                                       num_input_fields, Q));
861   // Outfields
862   CeedCallBackend(CeedOperatorSetupFieldsAtPoints_Ref(qf, op, false, impl->skip_rstr_out, impl->apply_add_basis_out, impl->e_vecs_full,
863                                                       impl->e_vecs_out, impl->q_vecs_out, num_input_fields, num_output_fields, Q));
864 
865   // Identity QFunctions
866   if (impl->is_identity_qf) {
867     CeedCallBackend(CeedVectorReferenceCopy(impl->q_vecs_in[0], &impl->q_vecs_out[0]));
868     CeedCallBackend(CeedVectorReferenceCopy(impl->q_vecs_in[0], &impl->e_vecs_out[0]));
869   }
870 
871   CeedCallBackend(CeedOperatorSetSetupDone(op));
872   return CEED_ERROR_SUCCESS;
873 }
874 
875 //------------------------------------------------------------------------------
876 // Input Basis Action
877 //------------------------------------------------------------------------------
878 static inline int CeedOperatorInputBasisAtPoints_Ref(CeedInt e, CeedInt num_points_offset, CeedInt num_points, CeedQFunctionField *qf_input_fields,
879                                                      CeedOperatorField *op_input_fields, CeedInt num_input_fields, CeedVector in_vec,
880                                                      CeedVector point_coords_elem, bool skip_active, CeedScalar *e_data[2 * CEED_FIELD_MAX],
881                                                      CeedOperator_Ref *impl, CeedRequest *request) {
882   for (CeedInt i = 0; i < num_input_fields; i++) {
883     bool                is_active;
884     CeedInt             elem_size, size, num_comp;
885     CeedRestrictionType rstr_type;
886     CeedEvalMode        eval_mode;
887     CeedVector          vec;
888     CeedElemRestriction elem_rstr;
889     CeedBasis           basis;
890 
891     // Skip active input
892     CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
893     is_active = vec == CEED_VECTOR_ACTIVE;
894     CeedCallBackend(CeedVectorDestroy(&vec));
895     if (skip_active && is_active) continue;
896 
897     // Get elem_size, eval_mode, size
898     CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr));
899     CeedCallBackend(CeedElemRestrictionGetType(elem_rstr, &rstr_type));
900     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
901     CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size));
902     // Restrict block active input
903     if (is_active && !impl->skip_rstr_in[i]) {
904       if (rstr_type == CEED_RESTRICTION_POINTS) {
905         CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement(elem_rstr, e, CEED_NOTRANSPOSE, in_vec, impl->e_vecs_in[i], request));
906       } else {
907         CeedCallBackend(CeedElemRestrictionApplyBlock(elem_rstr, e, CEED_NOTRANSPOSE, in_vec, impl->e_vecs_in[i], request));
908       }
909     }
910     // Basis action
911     switch (eval_mode) {
912       case CEED_EVAL_NONE:
913         if (!is_active) {
914           CeedCallBackend(CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data[i][num_points_offset * size]));
915         }
916         break;
917       // Note - these basis eval modes require FEM fields
918       case CEED_EVAL_INTERP:
919       case CEED_EVAL_GRAD:
920       case CEED_EVAL_DIV:
921       case CEED_EVAL_CURL:
922         CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis));
923         if (!is_active) {
924           CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
925           CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size));
926           CeedCallBackend(CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data[i][(CeedSize)e * elem_size * num_comp]));
927         }
928         CeedCallBackend(
929             CeedBasisApplyAtPoints(basis, 1, &num_points, CEED_NOTRANSPOSE, eval_mode, point_coords_elem, impl->e_vecs_in[i], impl->q_vecs_in[i]));
930         CeedCallBackend(CeedBasisDestroy(&basis));
931         break;
932       case CEED_EVAL_WEIGHT:
933         break;  // No action
934     }
935     CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
936   }
937   return CEED_ERROR_SUCCESS;
938 }
939 
940 //------------------------------------------------------------------------------
941 // Output Basis Action
942 //------------------------------------------------------------------------------
943 static inline int CeedOperatorOutputBasisAtPoints_Ref(CeedInt e, CeedInt num_points_offset, CeedInt num_points, CeedQFunctionField *qf_output_fields,
944                                                       CeedOperatorField *op_output_fields, CeedInt num_input_fields, CeedInt num_output_fields,
945                                                       bool *apply_add_basis, bool *skip_rstr, CeedOperator op, CeedVector out_vec,
946                                                       CeedVector point_coords_elem, CeedOperator_Ref *impl, CeedRequest *request) {
947   for (CeedInt i = 0; i < num_output_fields; i++) {
948     bool                is_active;
949     CeedRestrictionType rstr_type;
950     CeedEvalMode        eval_mode;
951     CeedVector          vec;
952     CeedElemRestriction elem_rstr;
953     CeedBasis           basis;
954 
955     // Get elem_size, eval_mode, size
956     CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr));
957     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode));
958     // Basis action
959     switch (eval_mode) {
960       case CEED_EVAL_NONE:
961         break;  // No action
962       case CEED_EVAL_INTERP:
963       case CEED_EVAL_GRAD:
964       case CEED_EVAL_DIV:
965       case CEED_EVAL_CURL:
966         CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis));
967         if (apply_add_basis[i]) {
968           CeedCallBackend(CeedBasisApplyAddAtPoints(basis, 1, &num_points, CEED_TRANSPOSE, eval_mode, point_coords_elem, impl->q_vecs_out[i],
969                                                     impl->e_vecs_out[i]));
970         } else {
971           CeedCallBackend(
972               CeedBasisApplyAtPoints(basis, 1, &num_points, CEED_TRANSPOSE, eval_mode, point_coords_elem, impl->q_vecs_out[i], impl->e_vecs_out[i]));
973         }
974         CeedCallBackend(CeedBasisDestroy(&basis));
975         break;
976       // LCOV_EXCL_START
977       case CEED_EVAL_WEIGHT: {
978         return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode");
979         // LCOV_EXCL_STOP
980       }
981     }
982     // Restrict output block
983     if (skip_rstr[i]) {
984       CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
985       continue;
986     }
987 
988     // Get output vector
989     CeedCallBackend(CeedElemRestrictionGetType(elem_rstr, &rstr_type));
990     CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec));
991     is_active = vec == CEED_VECTOR_ACTIVE;
992     if (is_active) vec = out_vec;
993     // Restrict
994     if (rstr_type == CEED_RESTRICTION_POINTS) {
995       CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement(elem_rstr, e, CEED_TRANSPOSE, impl->e_vecs_out[i], vec, request));
996     } else {
997       CeedCallBackend(CeedElemRestrictionApplyBlock(elem_rstr, e, CEED_TRANSPOSE, impl->e_vecs_out[i], vec, request));
998     }
999     if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec));
1000     CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
1001   }
1002   return CEED_ERROR_SUCCESS;
1003 }
1004 
1005 //------------------------------------------------------------------------------
1006 // Operator Apply
1007 //------------------------------------------------------------------------------
1008 static int CeedOperatorApplyAddAtPoints_Ref(CeedOperator op, CeedVector in_vec, CeedVector out_vec, CeedRequest *request) {
1009   CeedInt             num_points_offset          = 0, num_input_fields, num_output_fields, num_elem;
1010   CeedScalar         *e_data[2 * CEED_FIELD_MAX] = {0};
1011   CeedVector          point_coords               = NULL;
1012   CeedElemRestriction rstr_points                = NULL;
1013   CeedQFunctionField *qf_input_fields, *qf_output_fields;
1014   CeedQFunction       qf;
1015   CeedOperatorField  *op_input_fields, *op_output_fields;
1016   CeedOperator_Ref   *impl;
1017 
1018   CeedCallBackend(CeedOperatorGetData(op, &impl));
1019   CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
1020   CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
1021   CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
1022   CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
1023 
1024   // Setup
1025   CeedCallBackend(CeedOperatorSetupAtPoints_Ref(op));
1026 
1027   // Point coordinates
1028   CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, &point_coords));
1029 
1030   // Input Evecs and Restriction
1031   CeedCallBackend(CeedOperatorSetupInputs_Ref(num_input_fields, qf_input_fields, op_input_fields, NULL, true, e_data, impl, request));
1032 
1033   // Loop through elements
1034   for (CeedInt e = 0; e < num_elem; e++) {
1035     CeedInt num_points;
1036 
1037     // Setup points for element
1038     CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement(rstr_points, e, CEED_NOTRANSPOSE, point_coords, impl->point_coords_elem, request));
1039     CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points));
1040 
1041     // Input basis apply
1042     CeedCallBackend(CeedOperatorInputBasisAtPoints_Ref(e, num_points_offset, num_points, qf_input_fields, op_input_fields, num_input_fields, in_vec,
1043                                                        impl->point_coords_elem, false, e_data, impl, request));
1044 
1045     // Q function
1046     if (!impl->is_identity_qf) {
1047       CeedCallBackend(CeedQFunctionApply(qf, num_points, impl->q_vecs_in, impl->q_vecs_out));
1048     }
1049 
1050     // Output basis apply and restriction
1051     CeedCallBackend(CeedOperatorOutputBasisAtPoints_Ref(e, num_points_offset, num_points, qf_output_fields, op_output_fields, num_input_fields,
1052                                                         num_output_fields, impl->apply_add_basis_out, impl->skip_rstr_out, op, out_vec,
1053                                                         impl->point_coords_elem, impl, request));
1054 
1055     num_points_offset += num_points;
1056   }
1057 
1058   // Restore input arrays
1059   CeedCallBackend(CeedOperatorRestoreInputs_Ref(num_input_fields, qf_input_fields, op_input_fields, true, e_data, impl));
1060 
1061   // Cleanup point coordinates
1062   CeedCallBackend(CeedVectorDestroy(&point_coords));
1063   CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
1064   return CEED_ERROR_SUCCESS;
1065 }
1066 
1067 //------------------------------------------------------------------------------
1068 // Core code for assembling linear QFunction
1069 //------------------------------------------------------------------------------
1070 static inline int CeedOperatorLinearAssembleQFunctionAtPointsCore_Ref(CeedOperator op, bool build_objects, CeedVector *assembled,
1071                                                                       CeedElemRestriction *rstr, CeedRequest *request) {
1072   Ceed                ceed;
1073   CeedInt             qf_size_in, qf_size_out, max_num_points, num_elem, num_input_fields, num_output_fields, num_points_offset = 0;
1074   CeedScalar         *assembled_array, *e_data_full[2 * CEED_FIELD_MAX] = {NULL};
1075   CeedVector          point_coords = NULL;
1076   CeedQFunctionField *qf_input_fields, *qf_output_fields;
1077   CeedQFunction       qf;
1078   CeedOperatorField  *op_input_fields, *op_output_fields;
1079   CeedOperator_Ref   *impl;
1080   CeedElemRestriction rstr_points = NULL;
1081 
1082   CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
1083   CeedCallBackend(CeedOperatorGetData(op, &impl));
1084   qf_size_in  = impl->qf_size_in;
1085   qf_size_out = impl->qf_size_out;
1086   CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
1087   CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
1088   CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
1089   CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
1090 
1091   // Setup
1092   CeedCallBackend(CeedOperatorSetupAtPoints_Ref(op));
1093 
1094   // Check for restriction only operator
1095   CeedCheck(!impl->is_identity_rstr_op, ceed, CEED_ERROR_BACKEND, "Assembling restriction only operators is not supported");
1096 
1097   // Point coordinates
1098   CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, &point_coords));
1099   CeedCallBackend(CeedElemRestrictionGetMaxPointsInElement(rstr_points, &max_num_points));
1100 
1101   // Input Evecs and Restriction
1102   CeedCallBackend(CeedOperatorSetupInputs_Ref(num_input_fields, qf_input_fields, op_input_fields, NULL, true, e_data_full, impl, request));
1103 
1104   // Count number of active input fields
1105   if (qf_size_in == 0) {
1106     for (CeedInt i = 0; i < num_input_fields; i++) {
1107       CeedInt    field_size;
1108       CeedVector vec;
1109 
1110       // Get input vector
1111       CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
1112       // Check if active input
1113       if (vec == CEED_VECTOR_ACTIVE) {
1114         // Check that all active inputs are nodal fields
1115         {
1116           CeedElemRestriction elem_rstr;
1117           bool                is_at_points = false;
1118 
1119           CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr));
1120           CeedCallBackend(CeedElemRestrictionIsAtPoints(elem_rstr, &is_at_points));
1121           CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
1122           CeedCheck(!is_at_points, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction with active input at points");
1123         }
1124         // Get size of active input
1125         CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size));
1126         qf_size_in += field_size;
1127       }
1128       CeedCallBackend(CeedVectorDestroy(&vec));
1129     }
1130     CeedCheck(qf_size_in, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs");
1131     impl->qf_size_in = qf_size_in;
1132   }
1133 
1134   // Count number of active output fields
1135   if (qf_size_out == 0) {
1136     for (CeedInt i = 0; i < num_output_fields; i++) {
1137       CeedInt    field_size;
1138       CeedVector vec;
1139 
1140       // Get output vector
1141       CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec));
1142       // Check if active output
1143       if (vec == CEED_VECTOR_ACTIVE) {
1144         // Check that all active inputs are nodal fields
1145         {
1146           CeedElemRestriction elem_rstr;
1147           bool                is_at_points = false;
1148 
1149           CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr));
1150           CeedCallBackend(CeedElemRestrictionIsAtPoints(elem_rstr, &is_at_points));
1151           CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
1152           CeedCheck(!is_at_points, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction with active input at points");
1153         }
1154         // Get size of active output
1155         CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &field_size));
1156         CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0));
1157         qf_size_out += field_size;
1158       }
1159       CeedCallBackend(CeedVectorDestroy(&vec));
1160     }
1161     CeedCheck(qf_size_out > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs");
1162     impl->qf_size_out = qf_size_out;
1163   }
1164 
1165   // Build objects if needed
1166   if (build_objects) {
1167     CeedInt        num_points_total;
1168     const CeedInt *offsets;
1169 
1170     CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr_points, &num_points_total));
1171 
1172     // Create output restriction (at points)
1173     CeedCallBackend(CeedElemRestrictionGetOffsets(rstr_points, CEED_MEM_HOST, &offsets));
1174     CeedCallBackend(CeedElemRestrictionCreateAtPoints(ceed, num_elem, num_points_total, qf_size_in * qf_size_out,
1175                                                       qf_size_in * qf_size_out * num_points_total, CEED_MEM_HOST, CEED_COPY_VALUES, offsets, rstr));
1176     CeedCallBackend(CeedElemRestrictionRestoreOffsets(rstr_points, &offsets));
1177 
1178     // Create assembled vector
1179     CeedCallBackend(CeedElemRestrictionCreateVector(*rstr, assembled, NULL));
1180   }
1181   // Clear output vector
1182   CeedCallBackend(CeedVectorSetValue(*assembled, 0.0));
1183   CeedCallBackend(CeedVectorGetArray(*assembled, CEED_MEM_HOST, &assembled_array));
1184 
1185   // Loop through elements
1186   for (CeedInt e = 0; e < num_elem; e++) {
1187     CeedInt num_points;
1188 
1189     // Setup points for element
1190     CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement(rstr_points, e, CEED_NOTRANSPOSE, point_coords, impl->point_coords_elem, request));
1191     CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points));
1192 
1193     // Input basis apply
1194     CeedCallBackend(CeedOperatorInputBasisAtPoints_Ref(e, num_points_offset, num_points, qf_input_fields, op_input_fields, num_input_fields, NULL,
1195                                                        impl->point_coords_elem, true, e_data_full, impl, request));
1196 
1197     // Assemble QFunction
1198     for (CeedInt i = 0; i < num_input_fields; i++) {
1199       bool       is_active;
1200       CeedInt    field_size;
1201       CeedVector vec;
1202 
1203       // Get input vector
1204       CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
1205       is_active = vec == CEED_VECTOR_ACTIVE;
1206       CeedCallBackend(CeedVectorDestroy(&vec));
1207       // Check if active input
1208       if (!is_active) continue;
1209       // Get size of active input
1210       CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size));
1211       for (CeedInt field = 0; field < field_size; field++) {
1212         // Set current portion of input to 1.0
1213         {
1214           CeedScalar *array;
1215 
1216           CeedCallBackend(CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &array));
1217           for (CeedInt j = 0; j < num_points; j++) array[field * num_points + j] = 1.0;
1218           CeedCallBackend(CeedVectorRestoreArray(impl->q_vecs_in[i], &array));
1219         }
1220 
1221         if (!impl->is_identity_qf) {
1222           // Set Outputs
1223           for (CeedInt out = 0; out < num_output_fields; out++) {
1224             CeedVector vec;
1225             CeedInt    field_size;
1226 
1227             // Get output vector
1228             CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec));
1229             // Check if active output
1230             if (vec == CEED_VECTOR_ACTIVE) {
1231               CeedCallBackend(CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_USE_POINTER, assembled_array));
1232               CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[out], &field_size));
1233               assembled_array += field_size * num_points;  // Advance the pointer by the size of the output
1234             }
1235             CeedCallBackend(CeedVectorDestroy(&vec));
1236           }
1237           // Apply QFunction
1238           CeedCallBackend(CeedQFunctionApply(qf, num_points, impl->q_vecs_in, impl->q_vecs_out));
1239         } else {
1240           const CeedScalar *array;
1241           CeedInt           field_size;
1242 
1243           // Copy Identity Outputs
1244           CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[0], &field_size));
1245           CeedCallBackend(CeedVectorGetArrayRead(impl->q_vecs_out[0], CEED_MEM_HOST, &array));
1246           for (CeedInt j = 0; j < field_size * num_points; j++) assembled_array[j] = array[j];
1247           CeedCallBackend(CeedVectorRestoreArrayRead(impl->q_vecs_out[0], &array));
1248           assembled_array += field_size * num_points;
1249         }
1250         // Reset input to 0.0
1251         {
1252           CeedScalar *array;
1253 
1254           CeedCallBackend(CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &array));
1255           for (CeedInt j = 0; j < num_points; j++) array[field * num_points + j] = 0.0;
1256           CeedCallBackend(CeedVectorRestoreArray(impl->q_vecs_in[i], &array));
1257         }
1258       }
1259     }
1260     num_points_offset += num_points;
1261   }
1262 
1263   // Un-set output Qvecs to prevent accidental overwrite of Assembled
1264   if (!impl->is_identity_qf) {
1265     for (CeedInt out = 0; out < num_output_fields; out++) {
1266       CeedVector vec;
1267 
1268       // Get output vector
1269       CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec));
1270       // Check if active output
1271       if (vec == CEED_VECTOR_ACTIVE && num_elem > 0) {
1272         CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[out], CEED_MEM_HOST, NULL));
1273       }
1274       CeedCallBackend(CeedVectorDestroy(&vec));
1275     }
1276   }
1277 
1278   // Restore input arrays
1279   CeedCallBackend(CeedOperatorRestoreInputs_Ref(num_input_fields, qf_input_fields, op_input_fields, true, e_data_full, impl));
1280 
1281   // Restore output
1282   CeedCallBackend(CeedVectorRestoreArray(*assembled, &assembled_array));
1283 
1284   // Cleanup
1285   CeedCallBackend(CeedVectorDestroy(&point_coords));
1286   CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
1287   return CEED_ERROR_SUCCESS;
1288 }
1289 
1290 //------------------------------------------------------------------------------
1291 // Assemble Linear QFunction
1292 //------------------------------------------------------------------------------
1293 static int CeedOperatorLinearAssembleQFunctionAtPoints_Ref(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1294   return CeedOperatorLinearAssembleQFunctionAtPointsCore_Ref(op, true, assembled, rstr, request);
1295 }
1296 
1297 //------------------------------------------------------------------------------
1298 // Update Assembled Linear QFunction
1299 //------------------------------------------------------------------------------
1300 static int CeedOperatorLinearAssembleQFunctionAtPointsUpdate_Ref(CeedOperator op, CeedVector assembled, CeedElemRestriction rstr,
1301                                                                  CeedRequest *request) {
1302   return CeedOperatorLinearAssembleQFunctionAtPointsCore_Ref(op, false, &assembled, &rstr, request);
1303 }
1304 
1305 //------------------------------------------------------------------------------
1306 // Assemble Operator Diagonal AtPoints
1307 //------------------------------------------------------------------------------
1308 static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1309   CeedInt             num_points_offset = 0, num_input_fields, num_output_fields, num_elem, num_comp_active = 1;
1310   CeedScalar         *e_data[2 * CEED_FIELD_MAX] = {0};
1311   Ceed                ceed;
1312   CeedVector          point_coords = NULL, in_vec, out_vec;
1313   CeedElemRestriction rstr_points  = NULL;
1314   CeedQFunctionField *qf_input_fields, *qf_output_fields;
1315   CeedQFunction       qf;
1316   CeedOperatorField  *op_input_fields, *op_output_fields;
1317   CeedOperator_Ref   *impl;
1318 
1319   CeedCallBackend(CeedOperatorGetData(op, &impl));
1320   CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
1321   CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
1322   CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
1323   CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
1324 
1325   // Setup
1326   CeedCallBackend(CeedOperatorSetupAtPoints_Ref(op));
1327 
1328   // Ceed
1329   {
1330     Ceed ceed_parent;
1331 
1332     CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
1333     CeedCallBackend(CeedGetParent(ceed, &ceed_parent));
1334     if (ceed_parent) ceed = ceed_parent;
1335   }
1336 
1337   // Point coordinates
1338   CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, &point_coords));
1339 
1340   // Input and output vectors
1341   {
1342     CeedSize input_size, output_size;
1343 
1344     CeedCallBackend(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1345     CeedCallBackend(CeedVectorCreate(ceed, input_size, &in_vec));
1346     CeedCallBackend(CeedVectorCreate(ceed, output_size, &out_vec));
1347     CeedCallBackend(CeedVectorSetValue(out_vec, 0.0));
1348   }
1349 
1350   // Clear input Qvecs
1351   for (CeedInt i = 0; i < num_input_fields; i++) {
1352     bool       is_active;
1353     CeedVector vec;
1354 
1355     CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
1356     is_active = vec == CEED_VECTOR_ACTIVE;
1357     CeedCallBackend(CeedVectorDestroy(&vec));
1358     if (!is_active) continue;
1359     CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0));
1360   }
1361 
1362   // Input Evecs and Restriction
1363   CeedCallBackend(CeedOperatorSetupInputs_Ref(num_input_fields, qf_input_fields, op_input_fields, NULL, true, e_data, impl, request));
1364 
1365   // Loop through elements
1366   for (CeedInt e = 0; e < num_elem; e++) {
1367     CeedInt num_points, e_vec_size = 0;
1368 
1369     // Setup points for element
1370     CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement(rstr_points, e, CEED_NOTRANSPOSE, point_coords, impl->point_coords_elem, request));
1371     CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points));
1372 
1373     // Input basis apply for non-active bases
1374     CeedCallBackend(CeedOperatorInputBasisAtPoints_Ref(e, num_points_offset, num_points, qf_input_fields, op_input_fields, num_input_fields, in_vec,
1375                                                        impl->point_coords_elem, true, e_data, impl, request));
1376 
1377     // Loop over points on element
1378     for (CeedInt i = 0; i < num_input_fields; i++) {
1379       bool                is_active_at_points = true, is_active;
1380       CeedInt             elem_size_active    = 1;
1381       CeedRestrictionType rstr_type;
1382       CeedVector          vec;
1383       CeedElemRestriction elem_rstr;
1384 
1385       // -- Skip non-active input
1386       CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
1387       is_active = vec == CEED_VECTOR_ACTIVE;
1388       CeedCallBackend(CeedVectorDestroy(&vec));
1389       if (!is_active) continue;
1390 
1391       // -- Get active restriction type
1392       CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr));
1393       CeedCallBackend(CeedElemRestrictionGetType(elem_rstr, &rstr_type));
1394       is_active_at_points = rstr_type == CEED_RESTRICTION_POINTS;
1395       if (!is_active_at_points) CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size_active));
1396       else elem_size_active = num_points;
1397       CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp_active));
1398       CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
1399 
1400       e_vec_size = elem_size_active * num_comp_active;
1401       for (CeedInt s = 0; s < e_vec_size; s++) {
1402         CeedEvalMode eval_mode;
1403         CeedBasis    basis;
1404 
1405         // -- Update unit vector
1406         {
1407           CeedScalar *array;
1408 
1409           if (s == 0) CeedCallBackend(CeedVectorSetValue(impl->e_vecs_in[i], 0.0));
1410           CeedCallBackend(CeedVectorGetArray(impl->e_vecs_in[i], CEED_MEM_HOST, &array));
1411           array[s] = 1.0;
1412           if (s > 0) array[s - 1] = 0.0;
1413           CeedCallBackend(CeedVectorRestoreArray(impl->e_vecs_in[i], &array));
1414         }
1415         // -- Basis action
1416         CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
1417         switch (eval_mode) {
1418           case CEED_EVAL_NONE:
1419             break;
1420           // Note - these basis eval modes require FEM fields
1421           case CEED_EVAL_INTERP:
1422           case CEED_EVAL_GRAD:
1423           case CEED_EVAL_DIV:
1424           case CEED_EVAL_CURL:
1425             CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis));
1426             CeedCallBackend(CeedBasisApplyAtPoints(basis, 1, &num_points, CEED_NOTRANSPOSE, eval_mode, impl->point_coords_elem, impl->e_vecs_in[i],
1427                                                    impl->q_vecs_in[i]));
1428             CeedCallBackend(CeedBasisDestroy(&basis));
1429             break;
1430           case CEED_EVAL_WEIGHT:
1431             break;  // No action
1432         }
1433 
1434         // -- Q function
1435         if (!impl->is_identity_qf) {
1436           CeedCallBackend(CeedQFunctionApply(qf, num_points, impl->q_vecs_in, impl->q_vecs_out));
1437         }
1438 
1439         // -- Output basis apply and restriction
1440         CeedCallBackend(CeedOperatorOutputBasisAtPoints_Ref(e, num_points_offset, num_points, qf_output_fields, op_output_fields, num_input_fields,
1441                                                             num_output_fields, impl->apply_add_basis_out, impl->skip_rstr_out, op, out_vec,
1442                                                             impl->point_coords_elem, impl, request));
1443 
1444         // -- Grab diagonal value
1445         for (CeedInt j = 0; j < num_output_fields; j++) {
1446           bool                is_active;
1447           CeedInt             elem_size = 0;
1448           CeedRestrictionType rstr_type;
1449           CeedEvalMode        eval_mode;
1450           CeedVector          vec;
1451           CeedElemRestriction elem_rstr;
1452           CeedBasis           basis;
1453 
1454           // ---- Skip non-active output
1455           CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[j], &vec));
1456           is_active = vec == CEED_VECTOR_ACTIVE;
1457           CeedCallBackend(CeedVectorDestroy(&vec));
1458           if (!is_active) continue;
1459 
1460           // ---- Check if elem size matches
1461           CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[j], &elem_rstr));
1462           CeedCallBackend(CeedElemRestrictionGetType(elem_rstr, &rstr_type));
1463           if (is_active_at_points && rstr_type != CEED_RESTRICTION_POINTS) {
1464             CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
1465             continue;
1466           }
1467           if (rstr_type == CEED_RESTRICTION_POINTS) {
1468             CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(elem_rstr, e, &elem_size));
1469           } else {
1470             CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size));
1471           }
1472           {
1473             CeedInt num_comp = 0;
1474 
1475             CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp));
1476             if (e_vec_size != num_comp * elem_size) {
1477               CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
1478               continue;
1479             }
1480           }
1481 
1482           // ---- Basis action
1483           CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[j], &eval_mode));
1484           switch (eval_mode) {
1485             case CEED_EVAL_NONE:
1486               break;  // No action
1487             case CEED_EVAL_INTERP:
1488             case CEED_EVAL_GRAD:
1489             case CEED_EVAL_DIV:
1490             case CEED_EVAL_CURL:
1491               CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[j], &basis));
1492               CeedCallBackend(CeedBasisApplyAtPoints(basis, 1, &num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, impl->q_vecs_out[j],
1493                                                      impl->e_vecs_out[j]));
1494               CeedCallBackend(CeedBasisDestroy(&basis));
1495               break;
1496             // LCOV_EXCL_START
1497             case CEED_EVAL_WEIGHT: {
1498               return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode");
1499               // LCOV_EXCL_STOP
1500             }
1501           }
1502           // ---- Update output vector
1503           {
1504             CeedScalar *array, current_value = 0.0;
1505 
1506             CeedCallBackend(CeedVectorGetArray(impl->e_vecs_out[j], CEED_MEM_HOST, &array));
1507             current_value = array[s];
1508             CeedCallBackend(CeedVectorRestoreArray(impl->e_vecs_out[j], &array));
1509             CeedCallBackend(CeedVectorSetValue(impl->e_vecs_out[j], 0.0));
1510             CeedCallBackend(CeedVectorGetArray(impl->e_vecs_out[j], CEED_MEM_HOST, &array));
1511             array[s] = current_value;
1512             CeedCallBackend(CeedVectorRestoreArray(impl->e_vecs_out[j], &array));
1513           }
1514           // ---- Restrict output block
1515           if (rstr_type == CEED_RESTRICTION_POINTS) {
1516             CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement(elem_rstr, e, CEED_TRANSPOSE, impl->e_vecs_out[j], assembled, request));
1517           } else {
1518             CeedCallBackend(CeedElemRestrictionApplyBlock(elem_rstr, e, CEED_TRANSPOSE, impl->e_vecs_out[j], assembled, request));
1519           }
1520           CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
1521         }
1522         // -- Reset unit vector
1523         if (s == e_vec_size - 1) CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0));
1524       }
1525     }
1526     num_points_offset += num_points;
1527   }
1528 
1529   // Restore input arrays
1530   CeedCallBackend(CeedOperatorRestoreInputs_Ref(num_input_fields, qf_input_fields, op_input_fields, true, e_data, impl));
1531 
1532   // Cleanup
1533   CeedCallBackend(CeedVectorDestroy(&in_vec));
1534   CeedCallBackend(CeedVectorDestroy(&out_vec));
1535   CeedCallBackend(CeedVectorDestroy(&point_coords));
1536   CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
1537   return CEED_ERROR_SUCCESS;
1538 }
1539 
1540 //------------------------------------------------------------------------------
1541 // Operator Destroy
1542 //------------------------------------------------------------------------------
1543 static int CeedOperatorDestroy_Ref(CeedOperator op) {
1544   CeedOperator_Ref *impl;
1545 
1546   CeedCallBackend(CeedOperatorGetData(op, &impl));
1547   CeedCallBackend(CeedFree(&impl->skip_rstr_in));
1548   CeedCallBackend(CeedFree(&impl->skip_rstr_out));
1549   CeedCallBackend(CeedFree(&impl->e_data_out_indices));
1550   CeedCallBackend(CeedFree(&impl->apply_add_basis_out));
1551   for (CeedInt i = 0; i < impl->num_inputs + impl->num_outputs; i++) {
1552     CeedCallBackend(CeedVectorDestroy(&impl->e_vecs_full[i]));
1553   }
1554   CeedCallBackend(CeedFree(&impl->e_vecs_full));
1555   CeedCallBackend(CeedFree(&impl->input_states));
1556 
1557   for (CeedInt i = 0; i < impl->num_inputs; i++) {
1558     CeedCallBackend(CeedVectorDestroy(&impl->e_vecs_in[i]));
1559     CeedCallBackend(CeedVectorDestroy(&impl->q_vecs_in[i]));
1560   }
1561   CeedCallBackend(CeedFree(&impl->e_vecs_in));
1562   CeedCallBackend(CeedFree(&impl->q_vecs_in));
1563 
1564   for (CeedInt i = 0; i < impl->num_outputs; i++) {
1565     CeedCallBackend(CeedVectorDestroy(&impl->e_vecs_out[i]));
1566     CeedCallBackend(CeedVectorDestroy(&impl->q_vecs_out[i]));
1567   }
1568   CeedCallBackend(CeedFree(&impl->e_vecs_out));
1569   CeedCallBackend(CeedFree(&impl->q_vecs_out));
1570   CeedCallBackend(CeedVectorDestroy(&impl->point_coords_elem));
1571 
1572   CeedCallBackend(CeedFree(&impl));
1573   return CEED_ERROR_SUCCESS;
1574 }
1575 
1576 //------------------------------------------------------------------------------
1577 // Operator Create
1578 //------------------------------------------------------------------------------
1579 int CeedOperatorCreate_Ref(CeedOperator op) {
1580   Ceed              ceed;
1581   CeedOperator_Ref *impl;
1582 
1583   CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
1584   CeedCallBackend(CeedCalloc(1, &impl));
1585   CeedCallBackend(CeedOperatorSetData(op, impl));
1586   CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", CeedOperatorLinearAssembleQFunction_Ref));
1587   CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunctionUpdate", CeedOperatorLinearAssembleQFunctionUpdate_Ref));
1588   CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", CeedOperatorApplyAdd_Ref));
1589   CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "Destroy", CeedOperatorDestroy_Ref));
1590   return CEED_ERROR_SUCCESS;
1591 }
1592 
1593 //------------------------------------------------------------------------------
1594 // Operator Create At Points
1595 //------------------------------------------------------------------------------
1596 int CeedOperatorCreateAtPoints_Ref(CeedOperator op) {
1597   Ceed              ceed;
1598   CeedOperator_Ref *impl;
1599 
1600   CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
1601   CeedCallBackend(CeedCalloc(1, &impl));
1602   CeedCallBackend(CeedOperatorSetData(op, impl));
1603   CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", CeedOperatorLinearAssembleQFunctionAtPoints_Ref));
1604   CeedCallBackend(
1605       CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunctionUpdate", CeedOperatorLinearAssembleQFunctionAtPointsUpdate_Ref));
1606   CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal", CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref));
1607   CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", CeedOperatorApplyAddAtPoints_Ref));
1608   CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "Destroy", CeedOperatorDestroy_Ref));
1609   return CEED_ERROR_SUCCESS;
1610 }
1611 
1612 //------------------------------------------------------------------------------
1613