xref: /libCEED/backends/ref/ceed-ref-operator.c (revision 89430934918f16657aaec3ade4ac0eeaba2051bd)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <math.h>
20 #include <stdbool.h>
21 #include <stddef.h>
22 #include <stdint.h>
23 #include "ceed-ref.h"
24 
25 //------------------------------------------------------------------------------
26 // Setup Input/Output Fields
27 //------------------------------------------------------------------------------
28 static int CeedOperatorSetupFields_Ref(CeedQFunction qf, CeedOperator op,
29                                        bool inOrOut,
30                                        CeedVector *full_evecs, CeedVector *e_vecs,
31                                        CeedVector *q_vecs, CeedInt starte,
32                                        CeedInt num_fields, CeedInt Q) {
33   CeedInt dim, ierr, size, P;
34   Ceed ceed;
35   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
36   CeedBasis basis;
37   CeedElemRestriction elem_restr;
38   CeedOperatorField *op_fields;
39   CeedQFunctionField *qf_fields;
40   if (inOrOut) {
41     ierr = CeedOperatorGetFields(op, NULL, &op_fields); CeedChkBackend(ierr);
42     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields); CeedChkBackend(ierr);
43   } else {
44     ierr = CeedOperatorGetFields(op, &op_fields, NULL); CeedChkBackend(ierr);
45     ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); CeedChkBackend(ierr);
46   }
47 
48   // Loop over fields
49   for (CeedInt i=0; i<num_fields; i++) {
50     CeedEvalMode eval_mode;
51     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
52     CeedChkBackend(ierr);
53 
54     if (eval_mode != CEED_EVAL_WEIGHT) {
55       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_restr);
56       CeedChkBackend(ierr);
57       ierr = CeedElemRestrictionCreateVector(elem_restr, NULL,
58                                              &full_evecs[i+starte]);
59       CeedChkBackend(ierr);
60     }
61 
62     switch(eval_mode) {
63     case CEED_EVAL_NONE:
64       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
65       ierr = CeedVectorCreate(ceed, Q*size, &q_vecs[i]); CeedChkBackend(ierr);
66       break;
67     case CEED_EVAL_INTERP:
68       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
69       ierr = CeedElemRestrictionGetElementSize(elem_restr, &P);
70       CeedChkBackend(ierr);
71       ierr = CeedVectorCreate(ceed, P*size, &e_vecs[i]); CeedChkBackend(ierr);
72       ierr = CeedVectorCreate(ceed, Q*size, &q_vecs[i]); CeedChkBackend(ierr);
73       break;
74     case CEED_EVAL_GRAD:
75       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
76       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
77       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
78       ierr = CeedElemRestrictionGetElementSize(elem_restr, &P);
79       CeedChkBackend(ierr);
80       ierr = CeedVectorCreate(ceed, P*size/dim, &e_vecs[i]); CeedChkBackend(ierr);
81       ierr = CeedVectorCreate(ceed, Q*size, &q_vecs[i]); CeedChkBackend(ierr);
82       break;
83     case CEED_EVAL_WEIGHT: // Only on input fields
84       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
85       ierr = CeedVectorCreate(ceed, Q, &q_vecs[i]); CeedChkBackend(ierr);
86       ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
87                             CEED_VECTOR_NONE, q_vecs[i]); CeedChkBackend(ierr);
88       break;
89     case CEED_EVAL_DIV:
90       break; // Not implemented
91     case CEED_EVAL_CURL:
92       break; // Not implemented
93     }
94   }
95   return CEED_ERROR_SUCCESS;
96 }
97 
98 //------------------------------------------------------------------------------
99 // Setup Operator
100 //------------------------------------------------------------------------------/*
101 static int CeedOperatorSetup_Ref(CeedOperator op) {
102   int ierr;
103   bool setup_done;
104   ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr);
105   if (setup_done) return CEED_ERROR_SUCCESS;
106   Ceed ceed;
107   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
108   CeedOperator_Ref *impl;
109   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
110   CeedQFunction qf;
111   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
112   CeedInt Q, num_input_fields, num_output_fields;
113   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
114   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
115   ierr = CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
116   CeedChkBackend(ierr);
117   CeedOperatorField *op_input_fields, *op_output_fields;
118   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
119   CeedChkBackend(ierr);
120   CeedQFunctionField *qf_input_fields, *qf_output_fields;
121   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
122   CeedChkBackend(ierr);
123 
124   // Allocate
125   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs);
126   CeedChkBackend(ierr);
127   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data);
128   CeedChkBackend(ierr);
129 
130   ierr = CeedCalloc(16, &impl->input_state); CeedChkBackend(ierr);
131   ierr = CeedCalloc(16, &impl->e_vecs_in); CeedChkBackend(ierr);
132   ierr = CeedCalloc(16, &impl->e_vecs_out); CeedChkBackend(ierr);
133   ierr = CeedCalloc(16, &impl->q_vecs_in); CeedChkBackend(ierr);
134   ierr = CeedCalloc(16, &impl->q_vecs_out); CeedChkBackend(ierr);
135 
136   impl->num_e_vecs_in = num_input_fields;
137   impl->num_e_vecs_out = num_output_fields;
138 
139   // Set up infield and outfield e_vecs and q_vecs
140   // Infields
141   ierr = CeedOperatorSetupFields_Ref(qf, op, 0, impl->e_vecs,
142                                      impl->e_vecs_in, impl->q_vecs_in, 0,
143                                      num_input_fields, Q);
144   CeedChkBackend(ierr);
145   // Outfields
146   ierr = CeedOperatorSetupFields_Ref(qf, op, 1, impl->e_vecs,
147                                      impl->e_vecs_out, impl->q_vecs_out,
148                                      num_input_fields, num_output_fields, Q);
149   CeedChkBackend(ierr);
150 
151   // Identity QFunctions
152   if (impl->is_identity_qf) {
153     CeedEvalMode in_mode, out_mode;
154     CeedQFunctionField *in_fields, *out_fields;
155     ierr = CeedQFunctionGetFields(qf, &in_fields, &out_fields);
156     CeedChkBackend(ierr);
157     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
158     CeedChkBackend(ierr);
159     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
160     CeedChkBackend(ierr);
161 
162     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
163       impl->is_identity_restr_op = true;
164     } else {
165       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
166       impl->q_vecs_out[0] = impl->q_vecs_in[0];
167       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
168     }
169   }
170 
171   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
172 
173   return CEED_ERROR_SUCCESS;
174 }
175 
176 //------------------------------------------------------------------------------
177 // Setup Operator Inputs
178 //------------------------------------------------------------------------------
179 static inline int CeedOperatorSetupInputs_Ref(CeedInt num_input_fields,
180     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
181     CeedVector in_vec, const bool skip_active, CeedOperator_Ref *impl,
182     CeedRequest *request) {
183   CeedInt ierr;
184   CeedEvalMode eval_mode;
185   CeedVector vec;
186   CeedElemRestriction elem_restr;
187   uint64_t state;
188 
189   for (CeedInt i=0; i<num_input_fields; i++) {
190     // Get input vector
191     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
192     CeedChkBackend(ierr);
193     if (vec == CEED_VECTOR_ACTIVE) {
194       if (skip_active)
195         continue;
196       else
197         vec = in_vec;
198     }
199 
200     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
201     CeedChkBackend(ierr);
202     // Restrict and Evec
203     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
204     } else {
205       // Restrict
206       ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
207       // Skip restriction if input is unchanged
208       if (state != impl->input_state[i] || vec == in_vec) {
209         ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
210         CeedChkBackend(ierr);
211         ierr = CeedElemRestrictionApply(elem_restr, CEED_NOTRANSPOSE, vec,
212                                         impl->e_vecs[i], request); CeedChkBackend(ierr);
213         impl->input_state[i] = state;
214       }
215       // Get evec
216       ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST,
217                                     (const CeedScalar **) &impl->e_data[i]);
218       CeedChkBackend(ierr);
219     }
220   }
221   return CEED_ERROR_SUCCESS;
222 }
223 
224 //------------------------------------------------------------------------------
225 // Input Basis Action
226 //------------------------------------------------------------------------------
227 static inline int CeedOperatorInputBasis_Ref(CeedInt e, CeedInt Q,
228     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
229     CeedInt num_input_fields, const bool skip_active, CeedOperator_Ref *impl) {
230   CeedInt ierr;
231   CeedInt dim, elem_size, size;
232   CeedElemRestriction elem_restr;
233   CeedEvalMode eval_mode;
234   CeedBasis basis;
235 
236   for (CeedInt i=0; i<num_input_fields; i++) {
237     // Skip active input
238     if (skip_active) {
239       CeedVector vec;
240       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
241       CeedChkBackend(ierr);
242       if (vec == CEED_VECTOR_ACTIVE)
243         continue;
244     }
245     // Get elem_size, eval_mode, size
246     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
247     CeedChkBackend(ierr);
248     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
249     CeedChkBackend(ierr);
250     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
251     CeedChkBackend(ierr);
252     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
253     CeedChkBackend(ierr);
254     // Basis action
255     switch(eval_mode) {
256     case CEED_EVAL_NONE:
257       ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
258                                 CEED_USE_POINTER, &impl->e_data[i][e*Q*size]);
259       CeedChkBackend(ierr);
260       break;
261     case CEED_EVAL_INTERP:
262       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
263       CeedChkBackend(ierr);
264       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
265                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size]);
266       CeedChkBackend(ierr);
267       ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_INTERP,
268                             impl->e_vecs_in[i], impl->q_vecs_in[i]); CeedChkBackend(ierr);
269       break;
270     case CEED_EVAL_GRAD:
271       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
272       CeedChkBackend(ierr);
273       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
274       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
275                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size/dim]);
276       CeedChkBackend(ierr);
277       ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE,
278                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
279                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
280       break;
281     case CEED_EVAL_WEIGHT:
282       break;  // No action
283     // LCOV_EXCL_START
284     case CEED_EVAL_DIV:
285     case CEED_EVAL_CURL: {
286       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
287       CeedChkBackend(ierr);
288       Ceed ceed;
289       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
290       return CeedError(ceed, CEED_ERROR_BACKEND,
291                        "Ceed evaluation mode not implemented");
292       // LCOV_EXCL_STOP
293     }
294     }
295   }
296   return CEED_ERROR_SUCCESS;
297 }
298 
299 //------------------------------------------------------------------------------
300 // Output Basis Action
301 //------------------------------------------------------------------------------
302 static inline int CeedOperatorOutputBasis_Ref(CeedInt e, CeedInt Q,
303     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
304     CeedInt num_input_fields, CeedInt num_output_fields, CeedOperator op,
305     CeedOperator_Ref *impl) {
306   CeedInt ierr;
307   CeedInt dim, elem_size, size;
308   CeedElemRestriction elem_restr;
309   CeedEvalMode eval_mode;
310   CeedBasis basis;
311 
312   for (CeedInt i=0; i<num_output_fields; i++) {
313     // Get elem_size, eval_mode, size
314     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
315     CeedChkBackend(ierr);
316     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
317     CeedChkBackend(ierr);
318     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
319     CeedChkBackend(ierr);
320     ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
321     CeedChkBackend(ierr);
322     // Basis action
323     switch(eval_mode) {
324     case CEED_EVAL_NONE:
325       break; // No action
326     case CEED_EVAL_INTERP:
327       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
328       CeedChkBackend(ierr);
329       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
330                                 CEED_USE_POINTER,
331                                 &impl->e_data[i + num_input_fields][e*elem_size*size]);
332       CeedChkBackend(ierr);
333       ierr = CeedBasisApply(basis, 1, CEED_TRANSPOSE,
334                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
335                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
336       break;
337     case CEED_EVAL_GRAD:
338       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
339       CeedChkBackend(ierr);
340       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
341       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
342                                 CEED_USE_POINTER,
343                                 &impl->e_data[i + num_input_fields][e*elem_size*size/dim]);
344       CeedChkBackend(ierr);
345       ierr = CeedBasisApply(basis, 1, CEED_TRANSPOSE,
346                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
347                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
348       break;
349     // LCOV_EXCL_START
350     case CEED_EVAL_WEIGHT: {
351       Ceed ceed;
352       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
353       return CeedError(ceed, CEED_ERROR_BACKEND,
354                        "CEED_EVAL_WEIGHT cannot be an output "
355                        "evaluation mode");
356     }
357     case CEED_EVAL_DIV:
358     case CEED_EVAL_CURL: {
359       Ceed ceed;
360       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
361       return CeedError(ceed, CEED_ERROR_BACKEND,
362                        "Ceed evaluation mode not implemented");
363       // LCOV_EXCL_STOP
364     }
365     }
366   }
367   return CEED_ERROR_SUCCESS;
368 }
369 
370 //------------------------------------------------------------------------------
371 // Restore Input Vectors
372 //------------------------------------------------------------------------------
373 static inline int CeedOperatorRestoreInputs_Ref(CeedInt num_input_fields,
374     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
375     const bool skip_active, CeedOperator_Ref *impl) {
376   CeedInt ierr;
377   CeedEvalMode eval_mode;
378 
379   for (CeedInt i=0; i<num_input_fields; i++) {
380     // Skip active inputs
381     if (skip_active) {
382       CeedVector vec;
383       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
384       CeedChkBackend(ierr);
385       if (vec == CEED_VECTOR_ACTIVE)
386         continue;
387     }
388     // Restore input
389     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
390     CeedChkBackend(ierr);
391     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
392     } else {
393       ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i],
394                                         (const CeedScalar **) &impl->e_data[i]);
395       CeedChkBackend(ierr);
396     }
397   }
398   return CEED_ERROR_SUCCESS;
399 }
400 
401 //------------------------------------------------------------------------------
402 // Operator Apply
403 //------------------------------------------------------------------------------
404 static int CeedOperatorApplyAdd_Ref(CeedOperator op, CeedVector in_vec,
405                                     CeedVector out_vec, CeedRequest *request) {
406   int ierr;
407   CeedOperator_Ref *impl;
408   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
409   CeedQFunction qf;
410   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
411   CeedInt Q, num_elem, num_input_fields, num_output_fields, size;
412   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
413   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
414   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
415   CeedChkBackend(ierr);
416   CeedOperatorField *op_input_fields, *op_output_fields;
417   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
418   CeedChkBackend(ierr);
419   CeedQFunctionField *qf_input_fields, *qf_output_fields;
420   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
421   CeedChkBackend(ierr);
422   CeedEvalMode eval_mode;
423   CeedVector vec;
424   CeedElemRestriction elem_restr;
425 
426   // Setup
427   ierr = CeedOperatorSetup_Ref(op); CeedChkBackend(ierr);
428 
429   // Restriction only operator
430   if (impl->is_identity_restr_op) {
431     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[0], &elem_restr);
432     CeedChkBackend(ierr);
433     ierr = CeedElemRestrictionApply(elem_restr, CEED_NOTRANSPOSE, in_vec,
434                                     impl->e_vecs[0], request); CeedChkBackend(ierr);
435     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[0], &elem_restr);
436     CeedChkBackend(ierr);
437     ierr = CeedElemRestrictionApply(elem_restr, CEED_TRANSPOSE, impl->e_vecs[0],
438                                     out_vec, request); CeedChkBackend(ierr);
439     return CEED_ERROR_SUCCESS;
440   }
441 
442   // Input Evecs and Restriction
443   ierr = CeedOperatorSetupInputs_Ref(num_input_fields, qf_input_fields,
444                                      op_input_fields, in_vec, false, impl,
445                                      request); CeedChkBackend(ierr);
446 
447   // Output Evecs
448   for (CeedInt i=0; i<num_output_fields; i++) {
449     ierr = CeedVectorGetArray(impl->e_vecs[i+impl->num_e_vecs_in], CEED_MEM_HOST,
450                               &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
451   }
452 
453   // Loop through elements
454   for (CeedInt e=0; e<num_elem; e++) {
455     // Output pointers
456     for (CeedInt i=0; i<num_output_fields; i++) {
457       ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
458       CeedChkBackend(ierr);
459       if (eval_mode == CEED_EVAL_NONE) {
460         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
461         CeedChkBackend(ierr);
462         ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
463                                   CEED_USE_POINTER,
464                                   &impl->e_data[i + num_input_fields][e*Q*size]);
465         CeedChkBackend(ierr);
466       }
467     }
468 
469     // Input basis apply
470     ierr = CeedOperatorInputBasis_Ref(e, Q, qf_input_fields, op_input_fields,
471                                       num_input_fields, false, impl);
472     CeedChkBackend(ierr);
473 
474     // Q function
475     if (!impl->is_identity_qf) {
476       ierr = CeedQFunctionApply(qf, Q, impl->q_vecs_in, impl->q_vecs_out);
477       CeedChkBackend(ierr);
478     }
479 
480     // Output basis apply
481     ierr = CeedOperatorOutputBasis_Ref(e, Q, qf_output_fields, op_output_fields,
482                                        num_input_fields, num_output_fields, op, impl);
483     CeedChkBackend(ierr);
484   }
485 
486   // Output restriction
487   for (CeedInt i=0; i<num_output_fields; i++) {
488     // Restore Evec
489     ierr = CeedVectorRestoreArray(impl->e_vecs[i+impl->num_e_vecs_in],
490                                   &impl->e_data[i + num_input_fields]);
491     CeedChkBackend(ierr);
492     // Get output vector
493     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
494     CeedChkBackend(ierr);
495     // Active
496     if (vec == CEED_VECTOR_ACTIVE)
497       vec = out_vec;
498     // Restrict
499     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
500     CeedChkBackend(ierr);
501     ierr = CeedElemRestrictionApply(elem_restr, CEED_TRANSPOSE,
502                                     impl->e_vecs[i+impl->num_e_vecs_in], vec, request);
503     CeedChkBackend(ierr);
504   }
505 
506   // Restore input arrays
507   ierr = CeedOperatorRestoreInputs_Ref(num_input_fields, qf_input_fields,
508                                        op_input_fields, false, impl);
509   CeedChkBackend(ierr);
510 
511   return CEED_ERROR_SUCCESS;
512 }
513 
514 //------------------------------------------------------------------------------
515 // Assemble Linear QFunction
516 //------------------------------------------------------------------------------
517 static int CeedOperatorLinearAssembleQFunction_Ref(CeedOperator op,
518     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
519   int ierr;
520   CeedOperator_Ref *impl;
521   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
522   CeedQFunction qf;
523   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
524   CeedInt Q, num_elem, num_input_fields, num_output_fields, size;
525   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
526   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
527   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
528   CeedChkBackend(ierr);
529   CeedOperatorField *op_input_fields, *op_output_fields;
530   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
531   CeedChkBackend(ierr);
532   CeedQFunctionField *qf_input_fields, *qf_output_fields;
533   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
534   CeedChkBackend(ierr);
535   CeedVector vec;
536   CeedInt num_active_in = 0, num_active_out = 0;
537   CeedVector *active_in = NULL;
538   CeedScalar *a, *tmp;
539   Ceed ceed, ceed_parent;
540   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
541   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent);
542   CeedChkBackend(ierr);
543   ceed_parent = ceed_parent ? ceed_parent : ceed;
544 
545   // Setup
546   ierr = CeedOperatorSetup_Ref(op); CeedChkBackend(ierr);
547 
548   // Check for identity
549   if (impl->is_identity_qf)
550     // LCOV_EXCL_START
551     return CeedError(ceed, CEED_ERROR_BACKEND,
552                      "Assembling identity QFunctions not supported");
553   // LCOV_EXCL_STOP
554 
555   // Input Evecs and Restriction
556   ierr = CeedOperatorSetupInputs_Ref(num_input_fields, qf_input_fields,
557                                      op_input_fields, NULL, true, impl, request);
558   CeedChkBackend(ierr);
559 
560   // Count number of active input fields
561   for (CeedInt i=0; i<num_input_fields; i++) {
562     // Get input vector
563     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
564     CeedChkBackend(ierr);
565     // Check if active input
566     if (vec == CEED_VECTOR_ACTIVE) {
567       ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
568       CeedChkBackend(ierr);
569       ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
570       ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
571       CeedChkBackend(ierr);
572       ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
573       for (CeedInt field=0; field<size; field++) {
574         ierr = CeedVectorCreate(ceed, Q, &active_in[num_active_in+field]);
575         CeedChkBackend(ierr);
576         ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
577                                   CEED_USE_POINTER, &tmp[field*Q]);
578         CeedChkBackend(ierr);
579       }
580       num_active_in += size;
581       ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
582     }
583   }
584 
585   // Count number of active output fields
586   for (CeedInt i=0; i<num_output_fields; i++) {
587     // Get output vector
588     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
589     CeedChkBackend(ierr);
590     // Check if active output
591     if (vec == CEED_VECTOR_ACTIVE) {
592       ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
593       CeedChkBackend(ierr);
594       num_active_out += size;
595     }
596   }
597 
598   // Check sizes
599   if (!num_active_in || !num_active_out)
600     // LCOV_EXCL_START
601     return CeedError(ceed, CEED_ERROR_BACKEND,
602                      "Cannot assemble QFunction without active inputs "
603                      "and outputs");
604   // LCOV_EXCL_STOP
605 
606   // Create output restriction
607   CeedInt strides[3] = {1, Q, num_active_in*num_active_out*Q}; /* *NOPAD* */
608   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, Q,
609                                           num_active_in*num_active_out,
610                                           num_active_in*num_active_out*num_elem*Q,
611                                           strides, rstr); CeedChkBackend(ierr);
612   // Create assembled vector
613   ierr = CeedVectorCreate(ceed_parent, num_elem*Q*num_active_in*num_active_out,
614                           assembled); CeedChkBackend(ierr);
615   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
616   ierr = CeedVectorGetArray(*assembled, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
617 
618   // Loop through elements
619   for (CeedInt e=0; e<num_elem; e++) {
620     // Input basis apply
621     ierr = CeedOperatorInputBasis_Ref(e, Q, qf_input_fields, op_input_fields,
622                                       num_input_fields, true, impl);
623     CeedChkBackend(ierr);
624 
625     // Assemble QFunction
626     for (CeedInt in=0; in<num_active_in; in++) {
627       // Set Inputs
628       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
629       if (num_active_in > 1) {
630         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
631                                   0.0); CeedChkBackend(ierr);
632       }
633       // Set Outputs
634       for (CeedInt out=0; out<num_output_fields; out++) {
635         // Get output vector
636         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
637         CeedChkBackend(ierr);
638         // Check if active output
639         if (vec == CEED_VECTOR_ACTIVE) {
640           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
641                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
642           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
643           CeedChkBackend(ierr);
644           a += size*Q; // Advance the pointer by the size of the output
645         }
646       }
647       // Apply QFunction
648       ierr = CeedQFunctionApply(qf, Q, impl->q_vecs_in, impl->q_vecs_out);
649       CeedChkBackend(ierr);
650     }
651   }
652 
653   // Un-set output Qvecs to prevent accidental overwrite of Assembled
654   for (CeedInt out=0; out<num_output_fields; out++) {
655     // Get output vector
656     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
657     CeedChkBackend(ierr);
658     // Check if active output
659     if (vec == CEED_VECTOR_ACTIVE) {
660       CeedVectorTakeArray(impl->q_vecs_out[out], CEED_MEM_HOST, NULL);
661       CeedChkBackend(ierr);
662     }
663   }
664 
665   // Restore input arrays
666   ierr = CeedOperatorRestoreInputs_Ref(num_input_fields, qf_input_fields,
667                                        op_input_fields, true, impl);
668   CeedChkBackend(ierr);
669 
670   // Restore output
671   ierr = CeedVectorRestoreArray(*assembled, &a); CeedChkBackend(ierr);
672 
673   // Cleanup
674   for (CeedInt i=0; i<num_active_in; i++) {
675     ierr = CeedVectorDestroy(&active_in[i]); CeedChkBackend(ierr);
676   }
677   ierr = CeedFree(&active_in); CeedChkBackend(ierr);
678 
679   return CEED_ERROR_SUCCESS;
680 }
681 
682 //------------------------------------------------------------------------------
683 // Get Basis Emode Pointer
684 //------------------------------------------------------------------------------
685 static inline void CeedOperatorGetBasisPointer_Ref(const CeedScalar **basis_ptr,
686     CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp,
687     const CeedScalar *grad) {
688   switch (eval_mode) {
689   case CEED_EVAL_NONE:
690     *basis_ptr = identity;
691     break;
692   case CEED_EVAL_INTERP:
693     *basis_ptr = interp;
694     break;
695   case CEED_EVAL_GRAD:
696     *basis_ptr = grad;
697     break;
698   case CEED_EVAL_WEIGHT:
699   case CEED_EVAL_DIV:
700   case CEED_EVAL_CURL:
701     break; // Caught by QF Assembly
702   }
703 }
704 
705 //------------------------------------------------------------------------------
706 // Create point block restriction
707 //------------------------------------------------------------------------------
708 static int CreatePBRestriction_Ref(CeedElemRestriction rstr,
709                                    CeedElemRestriction *pb_rstr) {
710   int ierr;
711   Ceed ceed;
712   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr);
713   const CeedInt *offsets;
714   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
715   CeedChkBackend(ierr);
716 
717   // Expand offsets
718   CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, *pbOffsets;
719   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChkBackend(ierr);
720   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp);
721   CeedChkBackend(ierr);
722   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size);
723   CeedChkBackend(ierr);
724   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride);
725   CeedChkBackend(ierr);
726   CeedInt shift = num_comp;
727   if (comp_stride != 1)
728     shift *= num_comp;
729   ierr = CeedCalloc(num_elem*elem_size, &pbOffsets); CeedChkBackend(ierr);
730   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
731     pbOffsets[i] = offsets[i]*shift;
732     if (pbOffsets[i] > max)
733       max = pbOffsets[i];
734   }
735 
736   // Create new restriction
737   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
738                                    1,
739                                    max + num_comp*num_comp, CEED_MEM_HOST,
740                                    CEED_OWN_POINTER, pbOffsets, pb_rstr);
741   CeedChkBackend(ierr);
742 
743   // Cleanup
744   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChkBackend(ierr);
745 
746   return CEED_ERROR_SUCCESS;
747 }
748 
749 //------------------------------------------------------------------------------
750 // Assemble diagonal common code
751 //------------------------------------------------------------------------------
752 static inline int CeedOperatorAssembleAddDiagonalCore_Ref(CeedOperator op,
753     CeedVector assembled, CeedRequest *request, const bool is_pointblock) {
754   int ierr;
755   Ceed ceed;
756   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
757 
758   // Assemble QFunction
759   CeedQFunction qf;
760   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
761   CeedInt num_input_fields, num_output_fields;
762   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
763   CeedChkBackend(ierr);
764   CeedVector assembled_qf;
765   CeedElemRestriction rstr;
766   ierr = CeedOperatorLinearAssembleQFunction(op,  &assembled_qf, &rstr, request);
767   CeedChkBackend(ierr);
768   ierr = CeedElemRestrictionDestroy(&rstr); CeedChkBackend(ierr);
769   CeedScalar max_norm = 0;
770   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm);
771   CeedChkBackend(ierr);
772 
773   // Determine active input basis
774   CeedOperatorField *op_fields;
775   CeedQFunctionField *qf_fields;
776   ierr = CeedOperatorGetFields(op, &op_fields, NULL); CeedChkBackend(ierr);
777   ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); CeedChkBackend(ierr);
778   CeedInt num_eval_mode_in = 0, num_comp, dim = 1;
779   CeedEvalMode *eval_mode_in = NULL;
780   CeedBasis basis_in = NULL;
781   CeedElemRestriction rstr_in = NULL;
782   for (CeedInt i=0; i<num_input_fields; i++) {
783     CeedVector vec;
784     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChkBackend(ierr);
785     if (vec == CEED_VECTOR_ACTIVE) {
786       CeedElemRestriction rstr;
787       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChkBackend(ierr);
788       ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChkBackend(ierr);
789       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChkBackend(ierr);
790       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr);
791       CeedChkBackend(ierr);
792       if (rstr_in && rstr_in != rstr)
793         // LCOV_EXCL_START
794         return CeedError(ceed, CEED_ERROR_BACKEND,
795                          "Multi-field non-composite operator diagonal assembly not supported");
796       // LCOV_EXCL_STOP
797       rstr_in = rstr;
798       CeedEvalMode eval_mode;
799       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
800       CeedChkBackend(ierr);
801       switch (eval_mode) {
802       case CEED_EVAL_NONE:
803       case CEED_EVAL_INTERP:
804         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChkBackend(ierr);
805         eval_mode_in[num_eval_mode_in] = eval_mode;
806         num_eval_mode_in += 1;
807         break;
808       case CEED_EVAL_GRAD:
809         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChkBackend(ierr);
810         for (CeedInt d=0; d<dim; d++)
811           eval_mode_in[num_eval_mode_in+d] = eval_mode;
812         num_eval_mode_in += dim;
813         break;
814       case CEED_EVAL_WEIGHT:
815       case CEED_EVAL_DIV:
816       case CEED_EVAL_CURL:
817         break; // Caught by QF Assembly
818       }
819     }
820   }
821 
822   // Determine active output basis
823   ierr = CeedOperatorGetFields(op, NULL, &op_fields); CeedChkBackend(ierr);
824   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields); CeedChkBackend(ierr);
825   CeedInt num_eval_mode_out = 0;
826   CeedEvalMode *eval_mode_out = NULL;
827   CeedBasis basis_out = NULL;
828   CeedElemRestriction rstr_out = NULL;
829   for (CeedInt i=0; i<num_output_fields; i++) {
830     CeedVector vec;
831     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChkBackend(ierr);
832     if (vec == CEED_VECTOR_ACTIVE) {
833       CeedElemRestriction rstr;
834       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out);
835       CeedChkBackend(ierr);
836       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr);
837       CeedChkBackend(ierr);
838       if (rstr_out && rstr_out != rstr)
839         // LCOV_EXCL_START
840         return CeedError(ceed, CEED_ERROR_BACKEND,
841                          "Multi-field non-composite operator diagonal assembly not supported");
842       // LCOV_EXCL_STOP
843       rstr_out = rstr;
844       CeedEvalMode eval_mode;
845       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
846       CeedChkBackend(ierr);
847       switch (eval_mode) {
848       case CEED_EVAL_NONE:
849       case CEED_EVAL_INTERP:
850         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChkBackend(ierr);
851         eval_mode_out[num_eval_mode_out] = eval_mode;
852         num_eval_mode_out += 1;
853         break;
854       case CEED_EVAL_GRAD:
855         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out);
856         CeedChkBackend(ierr);
857         for (CeedInt d=0; d<dim; d++)
858           eval_mode_out[num_eval_mode_out+d] = eval_mode;
859         num_eval_mode_out += dim;
860         break;
861       case CEED_EVAL_WEIGHT:
862       case CEED_EVAL_DIV:
863       case CEED_EVAL_CURL:
864         break; // Caught by QF Assembly
865       }
866     }
867   }
868 
869   // Assemble point-block diagonal restriction, if needed
870   CeedElemRestriction diag_rstr = rstr_out;
871   if (is_pointblock) {
872     ierr = CreatePBRestriction_Ref(rstr_out, &diag_rstr); CeedChkBackend(ierr);
873   }
874 
875   // Create diagonal vector
876   CeedVector elem_diag;
877   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
878   CeedChkBackend(ierr);
879 
880   // Assemble element operator diagonals
881   CeedScalar *elem_diag_array, *assembled_qf_array;
882   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChkBackend(ierr);
883   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
884   CeedChkBackend(ierr);
885   ierr = CeedVectorGetArray(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
886   CeedChkBackend(ierr);
887   CeedInt num_elem, num_nodes, num_qpts;
888   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem);
889   CeedChkBackend(ierr);
890   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChkBackend(ierr);
891   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts);
892   CeedChkBackend(ierr);
893   // Basis matrices
894   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
895   CeedScalar *identity = NULL;
896   bool evalNone = false;
897   for (CeedInt i=0; i<num_eval_mode_in; i++)
898     evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE);
899   for (CeedInt i=0; i<num_eval_mode_out; i++)
900     evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE);
901   if (evalNone) {
902     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChkBackend(ierr);
903     for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++)
904       identity[i*num_nodes+i] = 1.0;
905   }
906   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChkBackend(ierr);
907   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChkBackend(ierr);
908   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChkBackend(ierr);
909   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChkBackend(ierr);
910   // Compute the diagonal of B^T D B
911   // Each element
912   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
913   for (CeedInt e=0; e<num_elem; e++) {
914     CeedInt d_out = -1;
915     // Each basis eval mode pair
916     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
917       const CeedScalar *bt = NULL;
918       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
919         d_out += 1;
920       CeedOperatorGetBasisPointer_Ref(&bt, eval_mode_out[e_out], identity, interp_out,
921                                       &grad_out[d_out*num_qpts*num_nodes]);
922       CeedInt d_in = -1;
923       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
924         const CeedScalar *b = NULL;
925         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
926           d_in += 1;
927         CeedOperatorGetBasisPointer_Ref(&b, eval_mode_in[e_in], identity, interp_in,
928                                         &grad_in[d_in*num_qpts*num_nodes]);
929         // Each component
930         for (CeedInt c_out=0; c_out<num_comp; c_out++)
931           // Each qpoint/node pair
932           for (CeedInt q=0; q<num_qpts; q++)
933             if (is_pointblock) {
934               // Point Block Diagonal
935               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
936                 const CeedScalar qf_value =
937                   assembled_qf_array[((((e*num_eval_mode_in+e_in)*num_comp+c_in)*
938                                        num_eval_mode_out+e_out)*num_comp+c_out)*num_qpts+q];
939                 if (fabs(qf_value) > qf_value_bound)
940                   for (CeedInt n=0; n<num_nodes; n++)
941                     elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
942                       bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
943               }
944             } else {
945               // Diagonal Only
946               const CeedScalar qf_value =
947                 assembled_qf_array[((((e*num_eval_mode_in+e_in)*num_comp+c_out)*
948                                      num_eval_mode_out+e_out)*num_comp+c_out)*num_qpts+q];
949               if (fabs(qf_value) > qf_value_bound)
950                 for (CeedInt n=0; n<num_nodes; n++)
951                   elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
952                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
953             }
954       }
955     }
956   }
957   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array);
958   CeedChkBackend(ierr);
959   ierr = CeedVectorRestoreArray(assembled_qf, &assembled_qf_array);
960   CeedChkBackend(ierr);
961 
962   // Assemble local operator diagonal
963   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
964                                   assembled, request); CeedChkBackend(ierr);
965 
966   // Cleanup
967   if (is_pointblock) {
968     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChkBackend(ierr);
969   }
970   ierr = CeedVectorDestroy(&assembled_qf); CeedChkBackend(ierr);
971   ierr = CeedVectorDestroy(&elem_diag); CeedChkBackend(ierr);
972   ierr = CeedFree(&eval_mode_in); CeedChkBackend(ierr);
973   ierr = CeedFree(&eval_mode_out); CeedChkBackend(ierr);
974   ierr = CeedFree(&identity); CeedChkBackend(ierr);
975 
976   return CEED_ERROR_SUCCESS;
977 }
978 
979 //------------------------------------------------------------------------------
980 // Assemble composite diagonal common code
981 //------------------------------------------------------------------------------
982 static inline int CeedOperatorLinearAssembleAddDiagonalCompositeCore_Ref(
983   CeedOperator op, CeedVector assembled, CeedRequest *request,
984   const bool is_pointblock) {
985   int ierr;
986   CeedInt num_sub;
987   CeedOperator *suboperators;
988   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChkBackend(ierr);
989   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChkBackend(ierr);
990   for (CeedInt i = 0; i < num_sub; i++) {
991     ierr = CeedOperatorAssembleAddDiagonalCore_Ref(suboperators[i], assembled,
992            request, is_pointblock); CeedChkBackend(ierr);
993   }
994   return CEED_ERROR_SUCCESS;
995 }
996 
997 //------------------------------------------------------------------------------
998 // Assemble Linear Diagonal
999 //------------------------------------------------------------------------------
1000 static int CeedOperatorLinearAssembleAddDiagonal_Ref(CeedOperator op,
1001     CeedVector assembled, CeedRequest *request) {
1002   int ierr;
1003   bool is_composite;
1004   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChkBackend(ierr);
1005   if (is_composite) {
1006     return CeedOperatorLinearAssembleAddDiagonalCompositeCore_Ref(op, assembled,
1007            request, false);
1008   } else {
1009     return CeedOperatorAssembleAddDiagonalCore_Ref(op, assembled, request, false);
1010   }
1011 }
1012 
1013 //------------------------------------------------------------------------------
1014 // Assemble Linear Point Block Diagonal
1015 //------------------------------------------------------------------------------
1016 static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Ref(CeedOperator op,
1017     CeedVector assembled, CeedRequest *request) {
1018   int ierr;
1019   bool is_composite;
1020   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChkBackend(ierr);
1021   if (is_composite) {
1022     return CeedOperatorLinearAssembleAddDiagonalCompositeCore_Ref(op, assembled,
1023            request, true);
1024   } else {
1025     return CeedOperatorAssembleAddDiagonalCore_Ref(op, assembled, request, true);
1026   }
1027 }
1028 
1029 //------------------------------------------------------------------------------
1030 // Build Mass, Laplacian matrices
1031 //------------------------------------------------------------------------------
1032 CeedPragmaOptimizeOff
1033 static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
1034                                 const CeedScalar *grad_1d,
1035                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
1036                                 CeedInt Q_1d, CeedInt dim,
1037                                 CeedScalar *mass, CeedScalar *laplace) {
1038 
1039   for (CeedInt i=0; i<P_1d; i++)
1040     for (CeedInt j=0; j<P_1d; j++) {
1041       CeedScalar sum = 0.0;
1042       for (CeedInt k=0; k<Q_1d; k++)
1043         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
1044       mass[i+j*P_1d] = sum;
1045     }
1046   // -- Laplacian
1047   for (CeedInt i=0; i<P_1d; i++)
1048     for (CeedInt j=0; j<P_1d; j++) {
1049       CeedScalar sum = 0.0;
1050       for (CeedInt k=0; k<Q_1d; k++)
1051         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
1052       laplace[i+j*P_1d] = sum;
1053     }
1054   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
1055   for (CeedInt i=0; i<P_1d; i++)
1056     laplace[i+P_1d*i] += perturbation;
1057   return CEED_ERROR_SUCCESS;
1058 }
1059 CeedPragmaOptimizeOn
1060 
1061 //------------------------------------------------------------------------------
1062 // Create FDM Element Inverse
1063 //------------------------------------------------------------------------------
1064 int CeedOperatorCreateFDMElementInverse_Ref(CeedOperator op,
1065     CeedOperator *fdm_inv, CeedRequest *request) {
1066   int ierr;
1067   Ceed ceed, ceed_parent;
1068   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
1069   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent);
1070   CeedChkBackend(ierr);
1071   ceed_parent = ceed_parent ? ceed_parent : ceed;
1072   CeedQFunction qf;
1073   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
1074 
1075   // Determine active input basis
1076   bool interp = false, grad = false;
1077   CeedBasis basis = NULL;
1078   CeedElemRestriction rstr = NULL;
1079   CeedOperatorField *op_fields;
1080   CeedQFunctionField *qf_fields;
1081   ierr = CeedOperatorGetFields(op, &op_fields, NULL); CeedChkBackend(ierr);
1082   ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); CeedChkBackend(ierr);
1083   CeedInt num_input_fields;
1084   ierr = CeedQFunctionGetNumArgs(qf, &num_input_fields, NULL);
1085   CeedChkBackend(ierr);
1086   for (CeedInt i=0; i<num_input_fields; i++) {
1087     CeedVector vec;
1088     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChkBackend(ierr);
1089     if (vec == CEED_VECTOR_ACTIVE) {
1090       CeedEvalMode eval_mode;
1091       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
1092       CeedChkBackend(ierr);
1093       interp = interp || eval_mode == CEED_EVAL_INTERP;
1094       grad = grad || eval_mode == CEED_EVAL_GRAD;
1095       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
1096       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr);
1097       CeedChkBackend(ierr);
1098     }
1099   }
1100   if (!basis)
1101     // LCOV_EXCL_START
1102     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
1103   // LCOV_EXCL_STOP
1104   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1,
1105                                                 l_size = 1;
1106   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChkBackend(ierr);
1107   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChkBackend(ierr);
1108   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChkBackend(ierr);
1109   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChkBackend(ierr);
1110   ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
1111   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
1112   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChkBackend(ierr);
1113   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChkBackend(ierr);
1114 
1115   // Build and diagonalize 1D Mass and Laplacian
1116   bool tensor_basis;
1117   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChkBackend(ierr);
1118   if (!tensor_basis)
1119     // LCOV_EXCL_START
1120     return CeedError(ceed, CEED_ERROR_BACKEND,
1121                      "FDMElementInverse only supported for tensor "
1122                      "bases");
1123   // LCOV_EXCL_STOP
1124   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
1125   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChkBackend(ierr);
1126   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChkBackend(ierr);
1127   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChkBackend(ierr);
1128   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChkBackend(ierr);
1129   ierr = CeedCalloc(P_1d, &lambda); CeedChkBackend(ierr);
1130   // -- Build matrices
1131   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
1132   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChkBackend(ierr);
1133   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChkBackend(ierr);
1134   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChkBackend(ierr);
1135   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
1136                               mass, laplace); CeedChkBackend(ierr);
1137 
1138   // -- Diagonalize
1139   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
1140   CeedChkBackend(ierr);
1141   ierr = CeedFree(&mass); CeedChkBackend(ierr);
1142   ierr = CeedFree(&laplace); CeedChkBackend(ierr);
1143   for (CeedInt i=0; i<P_1d; i++)
1144     for (CeedInt j=0; j<P_1d; j++)
1145       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
1146   ierr = CeedFree(&x); CeedChkBackend(ierr);
1147 
1148   // Assemble QFunction
1149   CeedVector assembled;
1150   CeedElemRestriction rstr_qf;
1151   ierr =  CeedOperatorLinearAssembleQFunction(op, &assembled, &rstr_qf,
1152           request); CeedChkBackend(ierr);
1153   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChkBackend(ierr);
1154   CeedScalar max_norm = 0;
1155   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm);
1156   CeedChkBackend(ierr);
1157 
1158   // Calculate element averages
1159   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
1160   CeedScalar *elem_avg;
1161   const CeedScalar *assembled_array, *q_weight_array;
1162   CeedVector q_weight;
1163   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChkBackend(ierr);
1164   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
1165                         CEED_VECTOR_NONE, q_weight); CeedChkBackend(ierr);
1166   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
1167   CeedChkBackend(ierr);
1168   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
1169   CeedChkBackend(ierr);
1170   ierr = CeedCalloc(num_elem, &elem_avg); CeedChkBackend(ierr);
1171   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
1172   for (CeedInt e=0; e<num_elem; e++) {
1173     CeedInt count = 0;
1174     CeedInt elem_offset = e*num_qpts*num_comp*num_comp*num_modes*num_modes;
1175     for (CeedInt q=0; q<num_qpts; q++)
1176       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
1177         if (fabs(assembled_array[elem_offset + i*num_qpts + q]) > qf_value_bound) {
1178           elem_avg[e] += assembled_array[elem_offset + i*num_qpts + q] /
1179                          q_weight_array[q];
1180           count++;
1181         }
1182     if (count) {
1183       elem_avg[e] /= count;
1184     } else {
1185       elem_avg[e] = 1.0;
1186     }
1187   }
1188   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array);
1189   CeedChkBackend(ierr);
1190   ierr = CeedVectorDestroy(&assembled); CeedChkBackend(ierr);
1191   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array);
1192   CeedChkBackend(ierr);
1193   ierr = CeedVectorDestroy(&q_weight); CeedChkBackend(ierr);
1194 
1195   // Build FDM diagonal
1196   CeedVector q_data;
1197   CeedScalar *q_data_array, *fdm_diagonal;
1198   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChkBackend(ierr);
1199   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
1200   for (CeedInt c=0; c<num_comp; c++)
1201     for (CeedInt n=0; n<elem_size; n++) {
1202       if (interp)
1203         fdm_diagonal[c*elem_size + n] = 1.0;
1204       if (grad)
1205         for (CeedInt d=0; d<dim; d++) {
1206           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
1207           fdm_diagonal[c*elem_size + n] += lambda[i];
1208         }
1209       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
1210         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
1211     }
1212   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
1213   CeedChkBackend(ierr);
1214   ierr = CeedVectorSetValue(q_data, 0.0);
1215   CeedChkBackend(ierr);
1216   ierr = CeedVectorGetArray(q_data, CEED_MEM_HOST, &q_data_array);
1217   CeedChkBackend(ierr);
1218   for (CeedInt e=0; e<num_elem; e++)
1219     for (CeedInt c=0; c<num_comp; c++)
1220       for (CeedInt n=0; n<elem_size; n++)
1221         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
1222             fdm_diagonal[c*elem_size + n]);
1223   ierr = CeedFree(&elem_avg); CeedChkBackend(ierr);
1224   ierr = CeedFree(&fdm_diagonal); CeedChkBackend(ierr);
1225   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChkBackend(ierr);
1226 
1227   // Setup FDM operator
1228   // -- Basis
1229   CeedBasis fdm_basis;
1230   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
1231   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChkBackend(ierr);
1232   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChkBackend(ierr);
1233   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChkBackend(ierr);
1234   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
1235                                  fdm_interp, grad_dummy, q_ref_dummy,
1236                                  q_weight_dummy, &fdm_basis); CeedChkBackend(ierr);
1237   ierr = CeedFree(&fdm_interp); CeedChkBackend(ierr);
1238   ierr = CeedFree(&grad_dummy); CeedChkBackend(ierr);
1239   ierr = CeedFree(&q_ref_dummy); CeedChkBackend(ierr);
1240   ierr = CeedFree(&q_weight_dummy); CeedChkBackend(ierr);
1241   ierr = CeedFree(&lambda); CeedChkBackend(ierr);
1242 
1243   // -- Restriction
1244   CeedElemRestriction rstr_qd_i;
1245   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
1246   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
1247                                           num_comp, num_elem*num_comp*elem_size,
1248                                           strides, &rstr_qd_i);
1249   CeedChkBackend(ierr);
1250   // -- QFunction
1251   CeedQFunction qf_fdm;
1252   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
1253   CeedChkBackend(ierr);
1254   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
1255   CeedChkBackend(ierr);
1256   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
1257   CeedChkBackend(ierr);
1258   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
1259   CeedChkBackend(ierr);
1260   // -- QFunction context
1261   CeedInt *num_comp_data;
1262   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
1263   num_comp_data[0] = num_comp;
1264   CeedQFunctionContext ctx_fdm;
1265   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChkBackend(ierr);
1266   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
1267                                      sizeof(*num_comp_data), num_comp_data);
1268   CeedChkBackend(ierr);
1269   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChkBackend(ierr);
1270   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChkBackend(ierr);
1271   // -- Operator
1272   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
1273   CeedChkBackend(ierr);
1274   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
1275   CeedChkBackend(ierr);
1276   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
1277                        q_data); CeedChkBackend(ierr);
1278   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
1279   CeedChkBackend(ierr);
1280 
1281   // Cleanup
1282   ierr = CeedVectorDestroy(&q_data); CeedChkBackend(ierr);
1283   ierr = CeedBasisDestroy(&fdm_basis); CeedChkBackend(ierr);
1284   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChkBackend(ierr);
1285   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChkBackend(ierr);
1286 
1287   return CEED_ERROR_SUCCESS;
1288 }
1289 
1290 //------------------------------------------------------------------------------
1291 // Operator Destroy
1292 //------------------------------------------------------------------------------
1293 static int CeedOperatorDestroy_Ref(CeedOperator op) {
1294   int ierr;
1295   CeedOperator_Ref *impl;
1296   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
1297 
1298   for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) {
1299     ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr);
1300   }
1301   ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr);
1302   ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr);
1303   ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr);
1304 
1305   for (CeedInt i=0; i<impl->num_e_vecs_in; i++) {
1306     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
1307     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
1308   }
1309   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
1310   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
1311 
1312   for (CeedInt i=0; i<impl->num_e_vecs_out; i++) {
1313     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
1314     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
1315   }
1316   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
1317   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
1318 
1319   ierr = CeedFree(&impl); CeedChkBackend(ierr);
1320   return CEED_ERROR_SUCCESS;
1321 }
1322 
1323 //------------------------------------------------------------------------------
1324 // Operator Create
1325 //------------------------------------------------------------------------------
1326 int CeedOperatorCreate_Ref(CeedOperator op) {
1327   int ierr;
1328   Ceed ceed;
1329   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
1330   CeedOperator_Ref *impl;
1331 
1332   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
1333   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
1334 
1335   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
1336                                 CeedOperatorLinearAssembleQFunction_Ref);
1337   CeedChkBackend(ierr);
1338   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal",
1339                                 CeedOperatorLinearAssembleAddDiagonal_Ref);
1340   CeedChkBackend(ierr);
1341   ierr = CeedSetBackendFunction(ceed, "Operator", op,
1342                                 "LinearAssembleAddPointBlockDiagonal",
1343                                 CeedOperatorLinearAssembleAddPointBlockDiagonal_Ref);
1344   CeedChkBackend(ierr);
1345   ierr = CeedSetBackendFunction(ceed, "Operator", op, "CreateFDMElementInverse",
1346                                 CeedOperatorCreateFDMElementInverse_Ref);
1347   CeedChkBackend(ierr);
1348   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
1349                                 CeedOperatorApplyAdd_Ref); CeedChkBackend(ierr);
1350   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
1351                                 CeedOperatorDestroy_Ref); CeedChkBackend(ierr);
1352   return CEED_ERROR_SUCCESS;
1353 }
1354 
1355 //------------------------------------------------------------------------------
1356 // Composite Operator Create
1357 //------------------------------------------------------------------------------
1358 int CeedCompositeOperatorCreate_Ref(CeedOperator op) {
1359   int ierr;
1360   Ceed ceed;
1361   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
1362   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal",
1363                                 CeedOperatorLinearAssembleAddDiagonal_Ref);
1364   CeedChkBackend(ierr);
1365   ierr = CeedSetBackendFunction(ceed, "Operator", op,
1366                                 "LinearAssembleAddPointBlockDiagonal",
1367                                 CeedOperatorLinearAssembleAddPointBlockDiagonal_Ref);
1368   CeedChkBackend(ierr);
1369   return CEED_ERROR_SUCCESS;
1370 }
1371 //------------------------------------------------------------------------------
1372