xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 33490f6ef26c584470cb54133cdd71c442f1841b)
1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a `CeedQFunction` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
28 
29   @param[in]  fallback_ceed `Ceed` on which to create fallback `CeedQFunction`
30   @param[in]  qf            `CeedQFunction` to create fallback for
31   @param[out] qf_fallback   Fallback `CeedQFunction`
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
CeedQFunctionCreateFallback(Ceed fallback_ceed,CeedQFunction qf,CeedQFunction * qf_fallback)37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char               *source_path_with_name = NULL;
39   CeedInt             num_input_fields, num_output_fields;
40   CeedQFunctionField *input_fields, *output_fields;
41 
42   // Check if NULL qf passed in
43   if (!qf) return CEED_ERROR_SUCCESS;
44 
45   CeedDebug(CeedQFunctionReturnCeed(qf), "Creating fallback CeedQFunction\n");
46 
47   if (qf->source_path) {
48     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
49 
50     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
51     memcpy(source_path_with_name, qf->source_path, path_len);
52     memcpy(&source_path_with_name[path_len], ":", 1);
53     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
54   } else if (qf->user_source) {
55     CeedCall(CeedStringAllocCopy(qf->user_source, &source_path_with_name));
56   } else {
57     CeedCall(CeedCalloc(1, &source_path_with_name));
58   }
59 
60   {
61     CeedInt           vec_length;
62     CeedQFunctionUser f;
63 
64     CeedCall(CeedQFunctionGetVectorLength(qf, &vec_length));
65     CeedCall(CeedQFunctionGetUserFunction(qf, &f));
66     CeedCall(CeedQFunctionCreateInterior(fallback_ceed, vec_length, f, source_path_with_name, qf_fallback));
67   }
68   {
69     CeedQFunctionContext ctx;
70 
71     CeedCall(CeedQFunctionGetContext(qf, &ctx));
72     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
73     CeedCall(CeedQFunctionContextDestroy(&ctx));
74   }
75   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
76   for (CeedInt i = 0; i < num_input_fields; i++) {
77     const char  *field_name;
78     CeedInt      size;
79     CeedEvalMode eval_mode;
80 
81     CeedCall(CeedQFunctionFieldGetData(input_fields[i], &field_name, &size, &eval_mode));
82     CeedCall(CeedQFunctionAddInput(*qf_fallback, field_name, size, eval_mode));
83   }
84   for (CeedInt i = 0; i < num_output_fields; i++) {
85     const char  *field_name;
86     CeedInt      size;
87     CeedEvalMode eval_mode;
88 
89     CeedCall(CeedQFunctionFieldGetData(output_fields[i], &field_name, &size, &eval_mode));
90     CeedCall(CeedQFunctionAddOutput(*qf_fallback, field_name, size, eval_mode));
91   }
92   CeedCall(CeedFree(&source_path_with_name));
93   return CEED_ERROR_SUCCESS;
94 }
95 
96 /**
97   @brief Duplicate a `CeedOperator` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
98 
99   @param[in,out] op `CeedOperator` to create fallback for
100 
101   @return An error code: 0 - success, otherwise - failure
102 
103   @ref Developer
104 **/
CeedOperatorCreateFallback(CeedOperator op)105 static int CeedOperatorCreateFallback(CeedOperator op) {
106   bool         is_composite;
107   Ceed         ceed, ceed_fallback;
108   CeedOperator op_fallback;
109 
110   // Check not already created
111   if (op->op_fallback) return CEED_ERROR_SUCCESS;
112 
113   // Fallback Ceed
114   CeedCall(CeedOperatorGetCeed(op, &ceed));
115   CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
116   CeedCall(CeedDestroy(&ceed));
117   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
118 
119   CeedDebug(CeedOperatorReturnCeed(op), "Creating fallback CeedOperator\n");
120 
121   // Clone Op
122   CeedCall(CeedOperatorIsComposite(op, &is_composite));
123   if (is_composite) {
124     CeedInt       num_suboperators;
125     CeedOperator *sub_operators;
126 
127     CeedCall(CeedOperatorCreateComposite(ceed_fallback, &op_fallback));
128     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
129     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
130     for (CeedInt i = 0; i < num_suboperators; i++) {
131       CeedOperator op_sub_fallback;
132 
133       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
134       CeedCall(CeedOperatorCompositeAddSub(op_fallback, op_sub_fallback));
135     }
136   } else {
137     bool               is_at_points = false;
138     CeedInt            num_input_fields, num_output_fields;
139     CeedQFunction      qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
140     CeedOperatorField *input_fields, *output_fields;
141 
142     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
143     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
144     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
145     CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
146     if (is_at_points) {
147       CeedVector          points;
148       CeedElemRestriction rstr_points;
149 
150       CeedCall(CeedOperatorCreateAtPoints(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
151       CeedCall(CeedOperatorAtPointsGetPoints(op, &rstr_points, &points));
152       CeedCall(CeedOperatorAtPointsSetPoints(op_fallback, rstr_points, points));
153       CeedCall(CeedVectorDestroy(&points));
154       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
155     } else {
156       CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
157     }
158     CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
159     for (CeedInt i = 0; i < num_input_fields; i++) {
160       const char         *field_name;
161       CeedVector          vec;
162       CeedElemRestriction rstr;
163       CeedBasis           basis;
164 
165       CeedCall(CeedOperatorFieldGetData(input_fields[i], &field_name, &rstr, &basis, &vec));
166       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
167       CeedCall(CeedVectorDestroy(&vec));
168       CeedCall(CeedElemRestrictionDestroy(&rstr));
169       CeedCall(CeedBasisDestroy(&basis));
170     }
171     for (CeedInt i = 0; i < num_output_fields; i++) {
172       const char         *field_name;
173       CeedVector          vec;
174       CeedElemRestriction rstr;
175       CeedBasis           basis;
176 
177       CeedCall(CeedOperatorFieldGetData(output_fields[i], &field_name, &rstr, &basis, &vec));
178       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
179       CeedCall(CeedVectorDestroy(&vec));
180       CeedCall(CeedElemRestrictionDestroy(&rstr));
181       CeedCall(CeedBasisDestroy(&basis));
182     }
183     {
184       CeedQFunctionAssemblyData data;
185 
186       CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
187       CeedCall(CeedQFunctionAssemblyDataReferenceCopy(data, &op_fallback->qf_assembled));
188     }
189     // Cleanup
190     CeedCall(CeedQFunctionDestroy(&qf_fallback));
191     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
192     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
193   }
194   CeedCall(CeedOperatorSetName(op_fallback, op->name));
195   CeedCall(CeedOperatorCheckReady(op_fallback));
196   // Note: No ref-counting here so we don't get caught in a reference loop.
197   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
198   op->op_fallback                 = op_fallback;
199   op_fallback->op_fallback_parent = op;
200   CeedCall(CeedDestroy(&ceed_fallback));
201   return CEED_ERROR_SUCCESS;
202 }
203 
204 /**
205   @brief Core logic for assembling operator diagonal or point block diagonal
206 
207   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
208   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
209   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
210   @param[out] assembled      `CeedVector` to store assembled diagonal
211 
212   @return An error code: 0 - success, otherwise - failure
213 
214   @ref Developer
215 **/
CeedOperatorLinearAssembleAddDiagonalSingle_Mesh(CeedOperator op,CeedRequest * request,const bool is_point_block,CeedVector assembled)216 static inline int CeedOperatorLinearAssembleAddDiagonalSingle_Mesh(CeedOperator op, CeedRequest *request, const bool is_point_block,
217                                                                    CeedVector assembled) {
218   bool is_composite;
219 
220   CeedCall(CeedOperatorIsComposite(op, &is_composite));
221   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
222 
223   // Assemble QFunction
224   CeedInt             layout_qf[3];
225   const CeedScalar   *assembled_qf_array;
226   CeedVector          assembled_qf        = NULL;
227   CeedElemRestriction assembled_elem_rstr = NULL;
228 
229   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
230   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
231   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
232   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
233 
234   // Get assembly data
235   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
236   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
237   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
238   CeedBasis               *active_bases_in, *active_bases_out;
239   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
240   CeedOperatorAssemblyData data;
241 
242   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
243   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
244                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
245                                                 &num_output_components));
246   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
247   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
248 
249   // Loop over all active bases (find matching input/output pairs)
250   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
251     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
252     bool                has_eval_none = false;
253     CeedScalar         *elem_diag_array, *identity = NULL;
254     CeedVector          elem_diag;
255     CeedElemRestriction diag_elem_rstr;
256 
257     if (num_active_bases_in <= num_active_bases_out) {
258       b_in = b;
259       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
260         if (active_bases_in[b_in] == active_bases_out[b_out]) {
261           break;
262         }
263       }
264       if (b_out == num_active_bases_out) {
265         continue;
266       }  // No matching output basis found
267     } else {
268       b_out = b;
269       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
270         if (active_bases_in[b_in] == active_bases_out[b_out]) {
271           break;
272         }
273       }
274       if (b_in == num_active_bases_in) {
275         continue;
276       }  // No matching output basis found
277     }
278     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
279               "Cannot assemble operator diagonal with different input and output active element restrictions");
280 
281     // Assemble point block diagonal restriction, if needed
282     if (is_point_block) {
283       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
284     } else {
285       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
286     }
287 
288     // Create diagonal vector
289     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
290 
291     // Assemble element operator diagonals
292     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
293     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
294     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
295     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
296     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
297     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
298     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
299 
300     // Construct identity matrix for basis if required
301     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
302       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
303     }
304     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
305       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
306     }
307     if (has_eval_none) {
308       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
309       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
310     }
311 
312     // Compute the diagonal of B^T D B
313     // Each element
314     for (CeedSize e = 0; e < num_elem; e++) {
315       // Each basis eval mode pair
316       CeedInt      d_out              = 0, q_comp_out;
317       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
318 
319       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
320         CeedInt           d_in              = 0, q_comp_in;
321         const CeedScalar *B_t               = NULL;
322         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
323 
324         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
325         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
326         if (q_comp_out > 1) {
327           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
328           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
329         }
330         eval_mode_out_prev = eval_modes_out[b_out][e_out];
331 
332         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
333           const CeedScalar *B = NULL;
334 
335           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
336           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
337           if (q_comp_in > 1) {
338             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
339             else B = &B[(++d_in) * num_qpts * num_nodes];
340           }
341           eval_mode_in_prev = eval_modes_in[b_in][e_in];
342 
343           // Each component
344           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
345             // Each qpt/node pair
346             for (CeedInt q = 0; q < num_qpts; q++) {
347               if (is_point_block) {
348                 // Point Block Diagonal
349                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
350                   const CeedSize c_offset =
351                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
352                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
353 
354                   for (CeedInt n = 0; n < num_nodes; n++) {
355                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
356                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
357                   }
358                 }
359               } else {
360                 // Diagonal Only
361                 const CeedInt c_offset =
362                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
363                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
364 
365                 for (CeedInt n = 0; n < num_nodes; n++) {
366                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
367                 }
368               }
369             }
370           }
371         }
372       }
373     }
374     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
375 
376     // Assemble local operator diagonal
377     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
378 
379     // Cleanup
380     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
381     CeedCall(CeedVectorDestroy(&elem_diag));
382     CeedCall(CeedFree(&identity));
383   }
384   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
385   CeedCall(CeedVectorDestroy(&assembled_qf));
386   return CEED_ERROR_SUCCESS;
387 }
388 
389 /**
390   @brief Core logic for assembling operator diagonal or point block diagonal
391 
392   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
393   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
394   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
395   @param[out] assembled      `CeedVector` to store assembled diagonal
396 
397   @return An error code: 0 - success, otherwise - failure
398 
399   @ref Developer
400 **/
CeedOperatorLinearAssembleAddDiagonalSingle(CeedOperator op,CeedRequest * request,const bool is_point_block,CeedVector assembled)401 static inline int CeedOperatorLinearAssembleAddDiagonalSingle(CeedOperator op, CeedRequest *request, const bool is_point_block,
402                                                               CeedVector assembled) {
403   bool is_at_points;
404 
405   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
406   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "AtPoints operator not supported");
407   CeedCall(CeedOperatorLinearAssembleAddDiagonalSingle_Mesh(op, request, is_point_block, assembled));
408   return CEED_ERROR_SUCCESS;
409 }
410 
411 /**
412   @brief Core logic for assembling composite operator diagonal
413 
414   @param[in]  op             `CeedOperator` to assemble point block diagonal
415   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
416   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
417   @param[out] assembled      `CeedVector` to store assembled diagonal
418 
419   @return An error code: 0 - success, otherwise - failure
420 
421   @ref Developer
422 **/
CeedOperatorLinearAssembleAddDiagonalComposite(CeedOperator op,CeedRequest * request,const bool is_point_block,CeedVector assembled)423 static inline int CeedOperatorLinearAssembleAddDiagonalComposite(CeedOperator op, CeedRequest *request, const bool is_point_block,
424                                                                  CeedVector assembled) {
425   CeedInt       num_sub;
426   CeedOperator *suboperators;
427 
428   CeedCall(CeedOperatorCompositeGetNumSub(op, &num_sub));
429   CeedCall(CeedOperatorCompositeGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_point_block) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite CeedOperator`.
442 
443   Users should generally use @ref CeedOperatorLinearAssembleSymbolic().
444 
445   @param[in]  op     `CeedOperator` to assemble nonzero pattern
446   @param[in]  offset Offset for number of entries
447   @param[out] rows   Row number for each entry
448   @param[out] cols   Column number for each entry
449 
450   @return An error code: 0 - success, otherwise - failure
451 
452   @ref Developer
453 **/
CeedOperatorAssembleSymbolicSingle(CeedOperator op,CeedInt offset,CeedInt * rows,CeedInt * cols)454 static int CeedOperatorAssembleSymbolicSingle(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
455   Ceed                ceed;
456   bool                is_composite;
457   CeedSize            num_nodes_in, num_nodes_out, local_num_entries, count = 0;
458   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
459   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3];
460   CeedScalar         *array;
461   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
462   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
463   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
464 
465   CeedCall(CeedOperatorIsComposite(op, &is_composite));
466   CeedCall(CeedOperatorGetCeed(op, &ceed));
467   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
468 
469   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
470   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
471   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
472   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
473   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
474   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, layout_er_in));
475 
476   // Determine elem_dof relation for input
477   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
478   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
479   for (CeedSize i = 0; i < num_nodes_in; i++) array[i] = i;
480   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
481   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
482   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
483   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
484   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
485   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
486   CeedCall(CeedVectorDestroy(&index_vec_in));
487   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
488 
489   if (elem_rstr_in != elem_rstr_out) {
490     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
491     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
492               "Active input and output operator restrictions must have the same number of elements."
493               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
494               num_elem_in, num_elem_out);
495     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
496     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
497     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, layout_er_out));
498 
499     // Determine elem_dof relation for output
500     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
501     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
502     for (CeedSize i = 0; i < num_nodes_out; i++) array[i] = i;
503     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
504     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
505     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
506     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
507     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
508     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
509     CeedCall(CeedVectorDestroy(&index_vec_out));
510     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
511   } else {
512     num_elem_out     = num_elem_in;
513     elem_size_out    = elem_size_in;
514     num_comp_out     = num_comp_in;
515     layout_er_out[0] = layout_er_in[0];
516     layout_er_out[1] = layout_er_in[1];
517     layout_er_out[2] = layout_er_in[2];
518     elem_dof_a_out   = elem_dof_a_in;
519   }
520   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
521 
522   // Determine i, j locations for element matrices
523   for (CeedInt e = 0; e < num_elem_in; e++) {
524     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
525       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
526         for (CeedInt i = 0; i < elem_size_out; i++) {
527           for (CeedInt j = 0; j < elem_size_in; j++) {
528             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
529             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
530             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
531             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
532 
533             rows[offset + count] = row;
534             cols[offset + count] = col;
535             count++;
536           }
537         }
538       }
539     }
540   }
541   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
542   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
543   CeedCall(CeedVectorDestroy(&elem_dof_in));
544   if (elem_rstr_in != elem_rstr_out) {
545     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
546     CeedCall(CeedVectorDestroy(&elem_dof_out));
547   }
548   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
549   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
550   CeedCall(CeedDestroy(&ceed));
551   return CEED_ERROR_SUCCESS;
552 }
553 
554 /**
555   @brief Core logic to assemble `CeedQFunction` and store result internally.
556 
557   Return copied references of stored data to the caller.
558   Caller is responsible for ownership and destruction of the copied references.
559   See also @ref CeedOperatorLinearAssembleQFunction().
560 
561   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
562         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
563 
564   @param[in]  op         `CeedOperator` to assemble `CeedQFunction`
565   @param[in]  use_parent Boolean flag to check for fallback parent implementation
566   @param[out] assembled  `CeedVector` to store assembled `CeedQFunction` at quadrature points
567   @param[out] rstr       `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
568   @param[in]  request    Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
569 
570   @return An error code: 0 - success, otherwise - failure
571 
572   @ref User
573 **/
CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(CeedOperator op,bool use_parent,CeedVector * assembled,CeedElemRestriction * rstr,CeedRequest * request)574 static int CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(CeedOperator op, bool use_parent, CeedVector *assembled, CeedElemRestriction *rstr,
575                                                                  CeedRequest *request) {
576   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
577   CeedOperator op_assemble                                                                           = NULL;
578   CeedOperator op_fallback_parent                                                                    = NULL;
579 
580   CeedCall(CeedOperatorCheckReady(op));
581 
582   // Determine if fallback parent or operator has implementation
583   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
584   if (op_fallback_parent && use_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
585     // -- Backend version for op fallback parent is faster, if it exists
586     CeedDebug(CeedOperatorReturnCeed(op), "Using fallback parent for CeedOperatorLinearAssembleQFunctionBuildOrUpdate\n");
587     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
588     op_assemble                   = op_fallback_parent;
589   } else if (op->LinearAssembleQFunctionUpdate) {
590     // -- Backend version for op
591     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
592     op_assemble                   = op;
593   }
594 
595   // Assemble QFunction
596   if (LinearAssembleQFunctionUpdate) {
597     // Backend or fallback parent version
598     CeedQFunctionAssemblyData data;
599     bool                      data_is_setup;
600     CeedVector                assembled_vec  = NULL;
601     CeedElemRestriction       assembled_rstr = NULL;
602 
603     CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
604     CeedCall(CeedQFunctionAssemblyDataIsSetup(data, &data_is_setup));
605     if (data_is_setup) {
606       bool update_needed;
607 
608       CeedCall(CeedQFunctionAssemblyDataGetObjects(data, &assembled_vec, &assembled_rstr));
609       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(data, &update_needed));
610       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
611     } else {
612       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
613       CeedCall(CeedQFunctionAssemblyDataSetObjects(data, assembled_vec, assembled_rstr));
614     }
615     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(data, false));
616 
617     // Copy reference from internally held copy
618     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
619     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
620     CeedCall(CeedVectorDestroy(&assembled_vec));
621     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
622   } else {
623     // Operator fallback
624     CeedOperator op_fallback;
625 
626     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleQFunctionBuildOrUpdate\n");
627     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
628     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
629     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
630   }
631   return CEED_ERROR_SUCCESS;
632 }
633 
634 /**
635   @brief Assemble `CeedQFunction` and store result internally, but do not use fallback parent.
636 
637   Return copied references of stored data to the caller.
638   Caller is responsible for ownership and destruction of the copied references.
639   See also @ref CeedOperatorLinearAssembleQFunction().
640 
641   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
642         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
643 
644   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
645   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
646   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
647   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
648 
649   @return An error code: 0 - success, otherwise - failure
650 
651   @ref Developer
652 **/
CeedOperatorLinearAssembleQFunctionBuildOrUpdateFallback(CeedOperator op,CeedVector * assembled,CeedElemRestriction * rstr,CeedRequest * request)653 int CeedOperatorLinearAssembleQFunctionBuildOrUpdateFallback(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr,
654                                                              CeedRequest *request) {
655   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, false, assembled, rstr, request);
656 }
657 
658 /**
659   @brief Assemble nonzero entries for non-composite `CeedOperator`.
660 
661   Users should generally use @ref CeedOperatorLinearAssemble().
662 
663   @param[in]  op     `CeedOperator` to assemble
664   @param[in]  offset Offset for number of entries
665   @param[out] values Values to assemble into matrix
666 
667   @return An error code: 0 - success, otherwise - failure
668 
669   @ref Developer
670 **/
CeedOperatorAssembleSingle(CeedOperator op,CeedInt offset,CeedVector values)671 int CeedOperatorAssembleSingle(CeedOperator op, CeedInt offset, CeedVector values) {
672   bool is_composite, is_at_points;
673 
674   CeedCall(CeedOperatorIsComposite(op, &is_composite));
675   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
676 
677   // Early exit for empty operator
678   {
679     CeedInt num_elem = 0;
680 
681     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
682     if (num_elem == 0) return CEED_ERROR_SUCCESS;
683   }
684 
685   if (op->LinearAssembleSingle) {
686     // Backend version
687     CeedCall(op->LinearAssembleSingle(op, offset, values));
688     return CEED_ERROR_SUCCESS;
689   } else {
690     // Operator fallback
691     CeedOperator op_fallback;
692 
693     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorAssembleSingle\n");
694     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
695     if (op_fallback) {
696       CeedCall(CeedOperatorAssembleSingle(op_fallback, offset, values));
697       return CEED_ERROR_SUCCESS;
698     }
699   }
700 
701   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
702   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
703             "Backend does not implement CeedOperatorLinearAssemble for AtPoints operator");
704 
705   // Assemble QFunction
706   CeedInt             layout_qf[3];
707   const CeedScalar   *assembled_qf_array;
708   CeedVector          assembled_qf        = NULL;
709   CeedElemRestriction assembled_elem_rstr = NULL;
710 
711   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
712   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
713   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
714   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
715 
716   // Get assembly data
717   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
718   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out;
719   CeedSize                 local_num_entries, count = 0;
720   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
721   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
722   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
723   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
724   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
725   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
726   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
727   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
728   CeedOperatorAssemblyData data;
729 
730   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
731   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
732                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
733 
734   CeedCheck(num_active_bases_in == 1 && num_active_bases_out == 1, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
735             "Cannot assemble operator with multiple active bases");
736   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
737             "Cannot assemble operator without inputs/outputs");
738 
739   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
740   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
741   basis_in  = active_bases_in[0];
742   basis_out = active_bases_out[0];
743   B_mat_in  = B_mats_in[0];
744   B_mat_out = B_mats_out[0];
745 
746   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
747   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
748   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
749   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
750   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
751 
752   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
753   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
754     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
755   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
756     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
757   }
758 
759   if (elem_rstr_in != elem_rstr_out) {
760     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
761     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
762               "Active input and output operator restrictions must have the same number of elements."
763               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
764               num_elem_in, num_elem_out);
765     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
766     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
767     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
768     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
769     CeedCheck(num_qpts_in == num_qpts_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
770               "Active input and output bases must have the same number of quadrature points."
771               " Input has %" CeedInt_FMT " points; output has %" CeedInt_FMT "points.",
772               num_qpts_in, num_qpts_out);
773 
774     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
775     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
776       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
777     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
778       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
779     }
780   } else {
781     num_elem_out  = num_elem_in;
782     elem_size_out = elem_size_in;
783     num_comp_out  = num_comp_in;
784     num_qpts_out  = num_qpts_in;
785 
786     elem_rstr_orients_out      = elem_rstr_orients_in;
787     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
788   }
789   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
790 
791   // Loop over elements and put in data structure
792   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
793   CeedTensorContract contract;
794   CeedScalar        *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
795 
796   CeedCall(CeedBasisGetTensorContract(basis_in, &contract));
797   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
798   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
799   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
800 
801   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
802   for (CeedSize e = 0; e < num_elem_in; e++) {
803     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
804       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
805         // Compute B^T*D
806         for (CeedSize n = 0; n < elem_size_out; n++) {
807           for (CeedSize q = 0; q < num_qpts_in; q++) {
808             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
809               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
810               CeedScalar     sum       = 0.0;
811 
812               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
813                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
814                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
815                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
816 
817                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
818               }
819               BTD_mat[btd_index] = sum;
820             }
821           }
822         }
823 
824         // Form element matrix itself (for each block component)
825         if (contract) {
826           CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE,
827                                            false, B_mat_in, elem_mat));
828         } else {
829           Ceed ceed;
830 
831           CeedCall(CeedOperatorGetCeed(op, &ceed));
832           CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
833           CeedCall(CeedDestroy(&ceed));
834         }
835 
836         // Transform the element matrix if required
837         if (elem_rstr_orients_out) {
838           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
839 
840           for (CeedInt i = 0; i < elem_size_out; i++) {
841             const double orient = elem_orients[i] ? -1.0 : 1.0;
842 
843             for (CeedInt j = 0; j < elem_size_in; j++) {
844               elem_mat[i * elem_size_in + j] *= orient;
845             }
846           }
847         } else if (elem_rstr_curl_orients_out) {
848           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
849 
850           // T^T*(B^T*D*B)
851           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
852           for (CeedInt i = 0; i < elem_size_out; i++) {
853             for (CeedInt j = 0; j < elem_size_in; j++) {
854               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
855                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
856                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
857             }
858           }
859         }
860         if (elem_rstr_orients_in) {
861           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
862 
863           for (CeedInt i = 0; i < elem_size_out; i++) {
864             for (CeedInt j = 0; j < elem_size_in; j++) {
865               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
866             }
867           }
868         } else if (elem_rstr_curl_orients_in) {
869           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
870 
871           // (B^T*D*B)*T
872           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
873           for (CeedInt i = 0; i < elem_size_out; i++) {
874             for (CeedInt j = 0; j < elem_size_in; j++) {
875               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
876                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
877                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
878             }
879           }
880         }
881 
882         // Put element matrix in coordinate data structure
883         for (CeedInt i = 0; i < elem_size_out; i++) {
884           for (CeedInt j = 0; j < elem_size_in; j++) {
885             vals[offset + count] = elem_mat[i * elem_size_in + j];
886             count++;
887           }
888         }
889       }
890     }
891   }
892   CeedCheck(count == local_num_entries, CeedOperatorReturnCeed(op), CEED_ERROR_MAJOR, "Error computing entries");
893   CeedCall(CeedVectorRestoreArray(values, &vals));
894 
895   // Cleanup
896   CeedCall(CeedFree(&BTD_mat));
897   CeedCall(CeedFree(&elem_mat));
898   CeedCall(CeedFree(&elem_mat_b));
899   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
900     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
901   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
902     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
903   }
904   if (elem_rstr_in != elem_rstr_out) {
905     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
906       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
907     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
908       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
909     }
910   }
911   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
912   CeedCall(CeedVectorDestroy(&assembled_qf));
913   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
914   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
915   return CEED_ERROR_SUCCESS;
916 }
917 
918 /**
919   @brief Count number of entries for assembled `CeedOperator`
920 
921   @param[in]  op          `CeedOperator` to assemble
922   @param[out] num_entries Number of entries in assembled representation
923 
924   @return An error code: 0 - success, otherwise - failure
925 
926   @ref Utility
927 **/
CeedOperatorAssemblyCountEntriesSingle(CeedOperator op,CeedSize * num_entries)928 static int CeedOperatorAssemblyCountEntriesSingle(CeedOperator op, CeedSize *num_entries) {
929   bool                is_composite;
930   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
931   CeedElemRestriction rstr_in, rstr_out;
932 
933   CeedCall(CeedOperatorIsComposite(op, &is_composite));
934   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
935 
936   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
937   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
938   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
939   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
940   if (rstr_in != rstr_out) {
941     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
942     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
943               "Active input and output operator restrictions must have the same number of elements."
944               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
945               num_elem_in, num_elem_out);
946     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
947     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
948   } else {
949     num_elem_out  = num_elem_in;
950     elem_size_out = elem_size_in;
951     num_comp_out  = num_comp_in;
952   }
953   CeedCall(CeedElemRestrictionDestroy(&rstr_in));
954   CeedCall(CeedElemRestrictionDestroy(&rstr_out));
955   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
956   return CEED_ERROR_SUCCESS;
957 }
958 
959 /**
960   @brief Count number of entries for assembled `CeedOperator`
961 
962   @param[in]  op          `CeedOperator` to assemble
963   @param[out] num_entries Number of entries in assembled representation
964 
965   @return An error code: 0 - success, otherwise - failure
966 
967   @ref Utility
968 **/
CeedOperatorLinearAssembleGetNumEntries(CeedOperator op,CeedSize * num_entries)969 int CeedOperatorLinearAssembleGetNumEntries(CeedOperator op, CeedSize *num_entries) {
970   bool is_composite;
971 
972   CeedCall(CeedOperatorCheckReady(op));
973   CeedCall(CeedOperatorIsComposite(op, &is_composite));
974 
975   if (is_composite) {
976     CeedInt       num_suboperators;
977     CeedOperator *sub_operators;
978 
979     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
980     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
981 
982     *num_entries = 0;
983     for (CeedInt k = 0; k < num_suboperators; ++k) {
984       CeedSize single_entries;
985 
986       CeedCall(CeedOperatorAssemblyCountEntriesSingle(sub_operators[k], &single_entries));
987       *num_entries += single_entries;
988     }
989   } else {
990     CeedCall(CeedOperatorAssemblyCountEntriesSingle(op, num_entries));
991   }
992   return CEED_ERROR_SUCCESS;
993 }
994 
995 /**
996   @brief Common code for creating a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`
997 
998   @param[in]  op_fine      Fine grid `CeedOperator`
999   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
1000   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
1001   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
1002   @param[in]  basis_c_to_f `CeedBasis` for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction operators
1003   @param[out] op_coarse    Coarse grid `CeedOperator`
1004   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
1005   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
1006 
1007   @return An error code: 0 - success, otherwise - failure
1008 
1009   @ref Developer
1010 **/
CeedOperatorMultigridLevelCreateSingle_Core(CeedOperator op_fine,CeedVector p_mult_fine,CeedElemRestriction rstr_coarse,CeedBasis basis_coarse,CeedBasis basis_c_to_f,CeedOperator * op_coarse,CeedOperator * op_prolong,CeedOperator * op_restrict)1011 static int CeedOperatorMultigridLevelCreateSingle_Core(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse,
1012                                                        CeedBasis basis_coarse, CeedBasis basis_c_to_f, CeedOperator *op_coarse,
1013                                                        CeedOperator *op_prolong, CeedOperator *op_restrict) {
1014   bool                is_composite;
1015   Ceed                ceed;
1016   CeedInt             dim              = 0, num_comp, num_input_fields, num_output_fields;
1017   CeedVector          mult_vec         = NULL;
1018   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
1019   CeedOperatorField  *input_fields, *output_fields;
1020 
1021   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1022 
1023   // Check for composite operator
1024   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
1025   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
1026 
1027   // Coarse Grid
1028   {
1029     bool is_at_points;
1030 
1031     CeedCall(CeedOperatorIsAtPoints(op_fine, &is_at_points));
1032     if (is_at_points) {
1033       CeedVector          point_coords;
1034       CeedElemRestriction rstr_points;
1035 
1036       CeedCall(CeedOperatorCreateAtPoints(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1037       CeedCall(CeedOperatorAtPointsGetPoints(op_fine, &rstr_points, &point_coords));
1038       CeedCall(CeedOperatorAtPointsSetPoints(*op_coarse, rstr_points, point_coords));
1039       CeedCall(CeedVectorDestroy(&point_coords));
1040       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
1041     } else {
1042       CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1043     }
1044   }
1045   CeedCall(CeedOperatorGetFields(op_fine, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
1046   // -- Clone input fields
1047   for (CeedInt i = 0; i < num_input_fields; i++) {
1048     const char         *field_name;
1049     CeedVector          vec;
1050     CeedElemRestriction rstr  = NULL;
1051     CeedBasis           basis = NULL;
1052 
1053     CeedCall(CeedOperatorFieldGetName(input_fields[i], &field_name));
1054     CeedCall(CeedOperatorFieldGetVector(input_fields[i], &vec));
1055     if (vec == CEED_VECTOR_ACTIVE) {
1056       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1057       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1058       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_fine));
1059     } else {
1060       CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr));
1061       CeedCall(CeedOperatorFieldGetBasis(input_fields[i], &basis));
1062     }
1063     if (dim == 0) CeedCall(CeedBasisGetDimension(basis, &dim));
1064     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1065     CeedCall(CeedVectorDestroy(&vec));
1066     CeedCall(CeedElemRestrictionDestroy(&rstr));
1067     CeedCall(CeedBasisDestroy(&basis));
1068   }
1069   // -- Clone output fields
1070   for (CeedInt i = 0; i < num_output_fields; i++) {
1071     const char         *field_name;
1072     CeedVector          vec;
1073     CeedElemRestriction rstr  = NULL;
1074     CeedBasis           basis = NULL;
1075 
1076     CeedCall(CeedOperatorFieldGetName(output_fields[i], &field_name));
1077     CeedCall(CeedOperatorFieldGetVector(output_fields[i], &vec));
1078     if (vec == CEED_VECTOR_ACTIVE) {
1079       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1080       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1081       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_fine));
1082     } else {
1083       CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr));
1084       CeedCall(CeedOperatorFieldGetBasis(output_fields[i], &basis));
1085     }
1086     if (dim == 0) CeedCall(CeedBasisGetDimension(basis, &dim));
1087     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1088     CeedCall(CeedVectorDestroy(&vec));
1089     CeedCall(CeedElemRestrictionDestroy(&rstr));
1090     CeedCall(CeedBasisDestroy(&basis));
1091   }
1092   dim = dim ? dim : 1;
1093   // -- Clone QFunctionAssemblyData
1094   {
1095     CeedQFunctionAssemblyData fine_data;
1096 
1097     CeedCall(CeedOperatorGetQFunctionAssemblyData(op_fine, &fine_data));
1098     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(fine_data, &(*op_coarse)->qf_assembled));
1099   }
1100 
1101   // Multiplicity vector
1102   bool use_scalar_mult = true;
1103 
1104   if (op_restrict || op_prolong) {
1105     CeedInt             num_elem, num_comp, elem_size;
1106     CeedVector          mult_l_vec, mult_e_vec;
1107     CeedRestrictionType rstr_type;
1108     CeedElemRestriction rstr_p_mult_full;
1109 
1110     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
1111     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
1112               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
1113     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
1114 
1115     // Create multiplicity multi-component l-vector
1116     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_full));
1117     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_l_vec, &mult_e_vec));
1118     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
1119     CeedCall(CeedElemRestrictionApply(rstr_p_mult_full, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
1120     CeedCall(CeedVectorSetValue(mult_l_vec, 0.0));
1121     CeedCall(CeedElemRestrictionApply(rstr_p_mult_full, CEED_TRANSPOSE, mult_e_vec, mult_l_vec, CEED_REQUEST_IMMEDIATE));
1122     CeedCall(CeedVectorReciprocal(mult_l_vec));
1123 
1124     // Determine to use scalar multiplicity or not
1125     {
1126       const CeedInt p = pow(elem_size, 1.0 / dim);
1127 
1128       use_scalar_mult = num_comp > 1 && (dim < 3 || num_comp - 1 > (3 * (pow(p, dim - 1) - pow(p, dim - 2)) + 1) / pow(p - 1, dim));
1129     }
1130 
1131     if (use_scalar_mult) {
1132       // Create multiplicity single component e-vector
1133       CeedCall(CeedElemRestrictionGetNumElements(rstr_p_mult_full, &num_elem));
1134       CeedCall(CeedElemRestrictionGetNumComponents(rstr_p_mult_full, &num_comp));
1135       CeedCall(CeedElemRestrictionGetElementSize(rstr_p_mult_full, &elem_size));
1136       CeedCall(CeedElemRestrictionCreateStrided(ceed, num_elem, elem_size, 1, num_elem * elem_size, CEED_STRIDES_BACKEND, &rstr_p_mult_fine));
1137       CeedCall(CeedElemRestrictionCreateVector(rstr_p_mult_fine, &mult_vec, NULL));
1138       {
1139         CeedQFunction qf_to_scalar;
1140         CeedOperator  op_to_scalar;
1141 
1142         CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Identity to scalar", &qf_to_scalar));
1143         CeedCall(CeedQFunctionAddInput(qf_to_scalar, "input", num_comp, CEED_EVAL_NONE));
1144         CeedCall(CeedQFunctionAddOutput(qf_to_scalar, "output", 1, CEED_EVAL_NONE));
1145 
1146         CeedCall(CeedOperatorCreate(ceed, qf_to_scalar, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_to_scalar));
1147         CeedCall(CeedOperatorSetField(op_to_scalar, "input", rstr_p_mult_full, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1148         CeedCall(CeedOperatorSetField(op_to_scalar, "output", rstr_p_mult_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1149 
1150         CeedCall(CeedOperatorApply(op_to_scalar, mult_l_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
1151 
1152         // Clean-up
1153         CeedCall(CeedQFunctionDestroy(&qf_to_scalar));
1154         CeedCall(CeedOperatorDestroy(&op_to_scalar));
1155       }
1156     } else {
1157       mult_vec = NULL;
1158       CeedCall(CeedVectorReferenceCopy(mult_l_vec, &mult_vec));
1159       rstr_p_mult_fine = NULL;
1160       CeedCall(CeedElemRestrictionReferenceCopy(rstr_p_mult_full, &rstr_p_mult_fine));
1161     }
1162     // Clean-up
1163     CeedCall(CeedVectorDestroy(&mult_e_vec));
1164     CeedCall(CeedVectorDestroy(&mult_l_vec));
1165     CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_full));
1166   }
1167 
1168   // Clone name
1169   bool   has_name = op_fine->name;
1170   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
1171   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
1172 
1173   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
1174   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
1175             "Prolongation or restriction operator creation requires coarse-to-fine basis");
1176 
1177   // Restriction/Prolongation Operators
1178   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
1179 
1180   // Restriction
1181   if (op_restrict) {
1182     CeedInt             *num_comp_r_data;
1183     CeedQFunctionContext ctx_r;
1184     CeedQFunction        qf_restrict;
1185 
1186     CeedCall(CeedQFunctionCreateInteriorByName(ceed, use_scalar_mult ? "Scale (scalar)" : "Scale", &qf_restrict));
1187     CeedCall(CeedCalloc(1, &num_comp_r_data));
1188     num_comp_r_data[0] = num_comp;
1189     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
1190     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
1191     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
1192     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
1193     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
1194     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", use_scalar_mult ? 1 : num_comp, CEED_EVAL_NONE));
1195     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
1196     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
1197 
1198     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
1199     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1200     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1201     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1202 
1203     // Set name
1204     char *restriction_name;
1205 
1206     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
1207     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1208     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
1209     CeedCall(CeedFree(&restriction_name));
1210 
1211     // Check
1212     CeedCall(CeedOperatorCheckReady(*op_restrict));
1213 
1214     // Cleanup
1215     CeedCall(CeedQFunctionDestroy(&qf_restrict));
1216   }
1217 
1218   // Prolongation
1219   if (op_prolong) {
1220     CeedInt             *num_comp_p_data;
1221     CeedQFunctionContext ctx_p;
1222     CeedQFunction        qf_prolong;
1223 
1224     CeedCall(CeedQFunctionCreateInteriorByName(ceed, use_scalar_mult ? "Scale (scalar)" : "Scale", &qf_prolong));
1225     CeedCall(CeedCalloc(1, &num_comp_p_data));
1226     num_comp_p_data[0] = num_comp;
1227     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
1228     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
1229     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
1230     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
1231     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
1232     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", use_scalar_mult ? 1 : num_comp, CEED_EVAL_NONE));
1233     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
1234     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
1235 
1236     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
1237     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1238     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1239     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1240 
1241     // Set name
1242     char *prolongation_name;
1243 
1244     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
1245     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1246     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
1247     CeedCall(CeedFree(&prolongation_name));
1248 
1249     // Check
1250     CeedCall(CeedOperatorCheckReady(*op_prolong));
1251 
1252     // Cleanup
1253     CeedCall(CeedQFunctionDestroy(&qf_prolong));
1254   }
1255 
1256   // Check
1257   CeedCall(CeedOperatorCheckReady(*op_coarse));
1258 
1259   // Cleanup
1260   CeedCall(CeedDestroy(&ceed));
1261   CeedCall(CeedVectorDestroy(&mult_vec));
1262   CeedCall(CeedElemRestrictionDestroy(&rstr_fine));
1263   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
1264   CeedCall(CeedBasisDestroy(&basis_c_to_f));
1265   return CEED_ERROR_SUCCESS;
1266 }
1267 
1268 /**
1269   @brief Build 1D mass matrix and Laplacian with perturbation
1270 
1271   @param[in]  interp_1d   Interpolation matrix in one dimension
1272   @param[in]  grad_1d     Gradient matrix in one dimension
1273   @param[in]  q_weight_1d Quadrature weights in one dimension
1274   @param[in]  P_1d        Number of basis nodes in one dimension
1275   @param[in]  Q_1d        Number of quadrature points in one dimension
1276   @param[in]  dim         Dimension of basis
1277   @param[out] mass        Assembled mass matrix in one dimension
1278   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1279 
1280   @return An error code: 0 - success, otherwise - failure
1281 
1282   @ref Developer
1283 **/
1284 CeedPragmaOptimizeOff
CeedBuildMassLaplace(const CeedScalar * interp_1d,const CeedScalar * grad_1d,const CeedScalar * q_weight_1d,CeedInt P_1d,CeedInt Q_1d,CeedInt dim,CeedScalar * mass,CeedScalar * laplace)1285 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
1286                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
1287   for (CeedInt i = 0; i < P_1d; i++) {
1288     for (CeedInt j = 0; j < P_1d; j++) {
1289       CeedScalar sum = 0.0;
1290       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
1291       mass[i + j * P_1d] = sum;
1292     }
1293   }
1294   // -- Laplacian
1295   for (CeedInt i = 0; i < P_1d; i++) {
1296     for (CeedInt j = 0; j < P_1d; j++) {
1297       CeedScalar sum = 0.0;
1298 
1299       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
1300       laplace[i + j * P_1d] = sum;
1301     }
1302   }
1303   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
1304   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
1305   return CEED_ERROR_SUCCESS;
1306 }
1307 CeedPragmaOptimizeOn
1308 
1309 /// @}
1310 
1311 /// ----------------------------------------------------------------------------
1312 /// CeedOperator Backend API
1313 /// ----------------------------------------------------------------------------
1314 /// @addtogroup CeedOperatorBackend
1315 /// @{
1316 
1317 /**
1318   @brief Select correct basis matrix pointer based on @ref CeedEvalMode
1319 
1320   @param[in]  basis     `CeedBasis` from which to get the basis matrix
1321   @param[in]  eval_mode Current basis evaluation mode
1322   @param[in]  identity  Pointer to identity matrix
1323   @param[out] basis_ptr `CeedBasis` pointer to set
1324 
1325   @ref Backend
1326 **/
CeedOperatorGetBasisPointer(CeedBasis basis,CeedEvalMode eval_mode,const CeedScalar * identity,const CeedScalar ** basis_ptr)1327 int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
1328   switch (eval_mode) {
1329     case CEED_EVAL_NONE:
1330       *basis_ptr = identity;
1331       break;
1332     case CEED_EVAL_INTERP:
1333       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
1334       break;
1335     case CEED_EVAL_GRAD:
1336       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
1337       break;
1338     case CEED_EVAL_DIV:
1339       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
1340       break;
1341     case CEED_EVAL_CURL:
1342       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
1343       break;
1344     case CEED_EVAL_WEIGHT:
1345       break;  // Caught by QF Assembly
1346   }
1347   assert(*basis_ptr != NULL);
1348   return CEED_ERROR_SUCCESS;
1349 }
1350 
1351 /**
1352   @brief Create point block restriction for active `CeedOperatorField`
1353 
1354   @param[in]  rstr             Original `CeedElemRestriction` for active field
1355   @param[out] point_block_rstr Address of the variable where the newly created `CeedElemRestriction` will be stored
1356 
1357   @return An error code: 0 - success, otherwise - failure
1358 
1359   @ref Backend
1360 **/
CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr,CeedElemRestriction * point_block_rstr)1361 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1362   Ceed           ceed;
1363   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1364   CeedSize       l_size;
1365   const CeedInt *offsets;
1366 
1367   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1368   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1369 
1370   // Expand offsets
1371   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1372   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1373   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1374   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1375   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1376   shift = num_comp;
1377   if (comp_stride != 1) shift *= num_comp;
1378   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1379   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1380     point_block_offsets[i] = offsets[i] * shift;
1381   }
1382 
1383   // Create new restriction
1384   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1385                                      point_block_offsets, point_block_rstr));
1386 
1387   // Cleanup
1388   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1389   CeedCall(CeedDestroy(&ceed));
1390   return CEED_ERROR_SUCCESS;
1391 }
1392 
1393 /**
1394   @brief Get `CeedQFunctionAssemblyData`
1395 
1396   @param[in]  op   `CeedOperator` to assemble
1397   @param[out] data `CeedQFunctionAssemblyData`
1398 
1399   @return An error code: 0 - success, otherwise - failure
1400 
1401   @ref Backend
1402 **/
CeedOperatorGetQFunctionAssemblyData(CeedOperator op,CeedQFunctionAssemblyData * data)1403 int CeedOperatorGetQFunctionAssemblyData(CeedOperator op, CeedQFunctionAssemblyData *data) {
1404   if (!op->qf_assembled) {
1405     CeedQFunctionAssemblyData data;
1406 
1407     CeedCall(CeedQFunctionAssemblyDataCreate(CeedOperatorReturnCeed(op), &data));
1408     op->qf_assembled = data;
1409   }
1410   *data = op->qf_assembled;
1411   return CEED_ERROR_SUCCESS;
1412 }
1413 
1414 /**
1415   @brief Create object holding `CeedQFunction` assembly data for `CeedOperator`
1416 
1417   @param[in]  ceed `Ceed` object used to create the `CeedQFunctionAssemblyData`
1418   @param[out] data Address of the variable where the newly created `CeedQFunctionAssemblyData` will be stored
1419 
1420   @return An error code: 0 - success, otherwise - failure
1421 
1422   @ref Backend
1423 **/
CeedQFunctionAssemblyDataCreate(Ceed ceed,CeedQFunctionAssemblyData * data)1424 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1425   CeedCall(CeedCalloc(1, data));
1426   (*data)->ref_count = 1;
1427   CeedCall(CeedReferenceCopy(ceed, &(*data)->ceed));
1428   return CEED_ERROR_SUCCESS;
1429 }
1430 
1431 /**
1432   @brief Increment the reference counter for a `CeedQFunctionAssemblyData`
1433 
1434   @param[in,out] data `CeedQFunctionAssemblyData` to increment the reference counter
1435 
1436   @return An error code: 0 - success, otherwise - failure
1437 
1438   @ref Backend
1439 **/
CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data)1440 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1441   data->ref_count++;
1442   return CEED_ERROR_SUCCESS;
1443 }
1444 
1445 /**
1446   @brief Set re-use of `CeedQFunctionAssemblyData`
1447 
1448   @param[in,out] data       `CeedQFunctionAssemblyData` to mark for reuse
1449   @param[in]     reuse_data Boolean flag indicating data re-use
1450 
1451   @return An error code: 0 - success, otherwise - failure
1452 
1453   @ref Backend
1454 **/
CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data,bool reuse_data)1455 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1456   data->reuse_data        = reuse_data;
1457   data->needs_data_update = true;
1458   return CEED_ERROR_SUCCESS;
1459 }
1460 
1461 /**
1462   @brief Mark `CeedQFunctionAssemblyData` as stale
1463 
1464   @param[in,out] data              `CeedQFunctionAssemblyData` to mark as stale
1465   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1466 
1467   @return An error code: 0 - success, otherwise - failure
1468 
1469   @ref Backend
1470 **/
CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data,bool needs_data_update)1471 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1472   data->needs_data_update = needs_data_update;
1473   return CEED_ERROR_SUCCESS;
1474 }
1475 
1476 /**
1477   @brief Determine if `CeedQFunctionAssemblyData` needs update
1478 
1479   @param[in]  data             `CeedQFunctionAssemblyData` to mark as stale
1480   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1481 
1482   @return An error code: 0 - success, otherwise - failure
1483 
1484   @ref Backend
1485 **/
CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data,bool * is_update_needed)1486 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1487   *is_update_needed = !data->reuse_data || data->needs_data_update;
1488   return CEED_ERROR_SUCCESS;
1489 }
1490 
1491 /**
1492   @brief Copy the pointer to a `CeedQFunctionAssemblyData`.
1493 
1494   Both pointers should be destroyed with @ref CeedQFunctionAssemblyDataDestroy().
1495 
1496   Note: If the value of ` *data_copy` passed to this function is non-`NULL` , then it is assumed that ` *data_copy` is a pointer to a `CeedQFunctionAssemblyData`.
1497         This `CeedQFunctionAssemblyData` will be destroyed if ` *data_copy` is the only reference to this `CeedQFunctionAssemblyData`.
1498 
1499   @param[in]     data      `CeedQFunctionAssemblyData` to copy reference to
1500   @param[in,out] data_copy Variable to store copied reference
1501 
1502   @return An error code: 0 - success, otherwise - failure
1503 
1504   @ref Backend
1505 **/
CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data,CeedQFunctionAssemblyData * data_copy)1506 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1507   CeedCall(CeedQFunctionAssemblyDataReference(data));
1508   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1509   *data_copy = data;
1510   return CEED_ERROR_SUCCESS;
1511 }
1512 
1513 /**
1514   @brief Get setup status for internal objects for `CeedQFunctionAssemblyData`
1515 
1516   @param[in]  data     `CeedQFunctionAssemblyData` to retrieve status
1517   @param[out] is_setup Boolean flag for setup status
1518 
1519   @return An error code: 0 - success, otherwise - failure
1520 
1521   @ref Backend
1522 **/
CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data,bool * is_setup)1523 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1524   *is_setup = data->is_setup;
1525   return CEED_ERROR_SUCCESS;
1526 }
1527 
1528 /**
1529   @brief Set internal objects for `CeedQFunctionAssemblyData`
1530 
1531   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1532   @param[in]     vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1533   @param[in]     rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1534 
1535   @return An error code: 0 - success, otherwise - failure
1536 
1537   @ref Backend
1538 **/
CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data,CeedVector vec,CeedElemRestriction rstr)1539 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1540   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1541   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1542 
1543   data->is_setup = true;
1544   return CEED_ERROR_SUCCESS;
1545 }
1546 
1547 /**
1548   @brief Get internal objects for `CeedQFunctionAssemblyData`
1549 
1550   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1551   @param[out]    vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1552   @param[out]    rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1553 
1554   @return An error code: 0 - success, otherwise - failure
1555 
1556   @ref Backend
1557 **/
CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data,CeedVector * vec,CeedElemRestriction * rstr)1558 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1559   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1560 
1561   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1562   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1563   return CEED_ERROR_SUCCESS;
1564 }
1565 
1566 /**
1567   @brief Destroy `CeedQFunctionAssemblyData`
1568 
1569   @param[in,out] data  `CeedQFunctionAssemblyData` to destroy
1570 
1571   @return An error code: 0 - success, otherwise - failure
1572 
1573   @ref Backend
1574 **/
CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData * data)1575 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1576   if (!*data || --(*data)->ref_count > 0) {
1577     *data = NULL;
1578     return CEED_ERROR_SUCCESS;
1579   }
1580   CeedCall(CeedDestroy(&(*data)->ceed));
1581   CeedCall(CeedVectorDestroy(&(*data)->vec));
1582   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1583 
1584   CeedCall(CeedFree(data));
1585   return CEED_ERROR_SUCCESS;
1586 }
1587 
1588 /**
1589   @brief Get `CeedOperatorAssemblyData`
1590 
1591   @param[in]  op   `CeedOperator` to assemble
1592   @param[out] data `CeedOperatorAssemblyData`
1593 
1594   @return An error code: 0 - success, otherwise - failure
1595 
1596   @ref Backend
1597 **/
CeedOperatorGetOperatorAssemblyData(CeedOperator op,CeedOperatorAssemblyData * data)1598 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1599   if (!op->op_assembled) {
1600     CeedOperatorAssemblyData data;
1601 
1602     CeedCall(CeedOperatorAssemblyDataCreate(CeedOperatorReturnCeed(op), op, &data));
1603     op->op_assembled = data;
1604   }
1605   *data = op->op_assembled;
1606   return CEED_ERROR_SUCCESS;
1607 }
1608 
1609 /**
1610   @brief Create object holding `CeedOperator` assembly data.
1611 
1612   The `CeedOperatorAssemblyData` holds an array with references to every active `CeedBasis` used in the `CeedOperator`.
1613   An array with references to the corresponding active `CeedElemRestriction` is also stored.
1614   For each active `CeedBasis, the `CeedOperatorAssemblyData` holds an array of all input and output @ref CeedEvalMode for this `CeedBasis`.
1615   The `CeedOperatorAssemblyData` holds an array of offsets for indexing into the assembled `CeedQFunction` arrays to the row representing each @ref CeedEvalMode.
1616   The number of input columns across all active bases for the assembled `CeedQFunction` is also stored.
1617   Lastly, the `CeedOperatorAssembly` data holds assembled matrices representing the full action of the `CeedBasis` for all @ref CeedEvalMode.
1618 
1619   @param[in]  ceed `Ceed` object used to create the `CeedOperatorAssemblyData`
1620   @param[in]  op   `CeedOperator` to be assembled
1621   @param[out] data Address of the variable where the newly created `CeedOperatorAssemblyData` will be stored
1622 
1623   @return An error code: 0 - success, otherwise - failure
1624 
1625   @ref Backend
1626 **/
CeedOperatorAssemblyDataCreate(Ceed ceed,CeedOperator op,CeedOperatorAssemblyData * data)1627 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1628   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1629   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1630   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1631   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1632   CeedQFunctionField *qf_fields;
1633   CeedQFunction       qf;
1634   CeedOperatorField  *op_fields;
1635   bool                is_composite;
1636 
1637   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1638   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1639 
1640   // Allocate
1641   CeedCall(CeedCalloc(1, data));
1642   CeedCall(CeedReferenceCopy(ceed, &(*data)->ceed));
1643 
1644   // Build OperatorAssembly data
1645   CeedCall(CeedOperatorGetQFunction(op, &qf));
1646 
1647   // Determine active input basis
1648   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1649   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1650   for (CeedInt i = 0; i < num_input_fields; i++) {
1651     CeedVector vec;
1652 
1653     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1654     if (vec == CEED_VECTOR_ACTIVE) {
1655       CeedInt      index = -1, num_comp, q_comp;
1656       CeedEvalMode eval_mode;
1657       CeedBasis    basis_in = NULL;
1658 
1659       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1660       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1661       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1662       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1663       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1664         if ((*data)->active_bases_in[i] == basis_in) index = i;
1665       }
1666       if (index == -1) {
1667         CeedElemRestriction elem_rstr_in;
1668 
1669         index = num_active_bases_in;
1670         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1671         (*data)->active_bases_in[num_active_bases_in] = NULL;
1672         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1673         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1674         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1675         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1676         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1677         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
1678         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1679         num_eval_modes_in[index] = 0;
1680         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1681         eval_modes_in[index] = NULL;
1682         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1683         eval_mode_offsets_in[index] = NULL;
1684         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1685         (*data)->assembled_bases_in[index] = NULL;
1686         num_active_bases_in++;
1687       }
1688       if (eval_mode != CEED_EVAL_WEIGHT) {
1689         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1690         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1691         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1692         for (CeedInt d = 0; d < q_comp; d++) {
1693           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1694           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1695           offset += num_comp;
1696         }
1697         num_eval_modes_in[index] += q_comp;
1698       }
1699       CeedCall(CeedBasisDestroy(&basis_in));
1700     }
1701     CeedCall(CeedVectorDestroy(&vec));
1702   }
1703 
1704   // Determine active output basis
1705   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1706   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1707   offset = 0;
1708   for (CeedInt i = 0; i < num_output_fields; i++) {
1709     CeedVector vec;
1710 
1711     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1712     if (vec == CEED_VECTOR_ACTIVE) {
1713       CeedInt      index = -1, num_comp, q_comp;
1714       CeedEvalMode eval_mode;
1715       CeedBasis    basis_out = NULL;
1716 
1717       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1718       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1719       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1720       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1721       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1722         if ((*data)->active_bases_out[i] == basis_out) index = i;
1723       }
1724       if (index == -1) {
1725         CeedElemRestriction elem_rstr_out;
1726 
1727         index = num_active_bases_out;
1728         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1729         (*data)->active_bases_out[num_active_bases_out] = NULL;
1730         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1731         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1732         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1733         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1734         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1735         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
1736         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1737         num_eval_modes_out[index] = 0;
1738         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1739         eval_modes_out[index] = NULL;
1740         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1741         eval_mode_offsets_out[index] = NULL;
1742         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1743         (*data)->assembled_bases_out[index] = NULL;
1744         num_active_bases_out++;
1745       }
1746       if (eval_mode != CEED_EVAL_WEIGHT) {
1747         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1748         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1749         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1750         for (CeedInt d = 0; d < q_comp; d++) {
1751           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1752           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1753           offset += num_comp;
1754         }
1755         num_eval_modes_out[index] += q_comp;
1756       }
1757       CeedCall(CeedBasisDestroy(&basis_out));
1758     }
1759     CeedCall(CeedVectorDestroy(&vec));
1760   }
1761   CeedCall(CeedQFunctionDestroy(&qf));
1762   (*data)->num_active_bases_in   = num_active_bases_in;
1763   (*data)->num_eval_modes_in     = num_eval_modes_in;
1764   (*data)->eval_modes_in         = eval_modes_in;
1765   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1766   (*data)->num_active_bases_out  = num_active_bases_out;
1767   (*data)->num_eval_modes_out    = num_eval_modes_out;
1768   (*data)->eval_modes_out        = eval_modes_out;
1769   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1770   (*data)->num_output_components = offset;
1771   return CEED_ERROR_SUCCESS;
1772 }
1773 
1774 /**
1775   @brief Get `CeedOperator` @ref CeedEvalMode for assembly.
1776 
1777   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1778 
1779   @param[in]  data                  `CeedOperatorAssemblyData`
1780   @param[out] num_active_bases_in   Total number of active bases for input
1781   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input @ref CeedEvalMode, or `NULL`.
1782                                       `eval_modes_in[0]` holds an array of eval modes for the first active `CeedBasis`.
1783   @param[out] eval_modes_in         Pointer to hold arrays of input @ref CeedEvalMode, or `NULL`
1784   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point
1785   @param[out] num_active_bases_out  Total number of active bases for output
1786   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output @ref CeedEvalMode, or `NULL`
1787   @param[out] eval_modes_out        Pointer to hold arrays of output @ref CeedEvalMode, or `NULL`
1788   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1789   @param[out] num_output_components The number of columns in the assembled `CeedQFunction` matrix for each quadrature point, including contributions of all active bases
1790 
1791   @return An error code: 0 - success, otherwise - failure
1792 
1793   @ref Backend
1794 **/
CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data,CeedInt * num_active_bases_in,CeedInt ** num_eval_modes_in,const CeedEvalMode *** eval_modes_in,CeedSize *** eval_mode_offsets_in,CeedInt * num_active_bases_out,CeedInt ** num_eval_modes_out,const CeedEvalMode *** eval_modes_out,CeedSize *** eval_mode_offsets_out,CeedSize * num_output_components)1795 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1796                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1797                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1798                                          CeedSize *num_output_components) {
1799   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1800   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1801   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1802   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1803   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1804   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1805   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1806   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1807   if (num_output_components) *num_output_components = data->num_output_components;
1808   return CEED_ERROR_SUCCESS;
1809 }
1810 
1811 /**
1812   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1813 
1814   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1815 
1816   @param[in]  data                 `CeedOperatorAssemblyData`
1817   @param[out] num_active_bases_in  Number of active input bases, or `NULL`
1818   @param[out] active_bases_in      Pointer to hold active input `CeedBasis`, or `NULL`
1819   @param[out] assembled_bases_in   Pointer to hold assembled active input `B` , or `NULL`
1820   @param[out] num_active_bases_out Number of active output bases, or `NULL`
1821   @param[out] active_bases_out     Pointer to hold active output `CeedBasis`, or `NULL`
1822   @param[out] assembled_bases_out  Pointer to hold assembled active output `B` , or `NULL`
1823 
1824   @return An error code: 0 - success, otherwise - failure
1825 
1826   @ref Backend
1827 **/
CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data,CeedInt * num_active_bases_in,CeedBasis ** active_bases_in,const CeedScalar *** assembled_bases_in,CeedInt * num_active_bases_out,CeedBasis ** active_bases_out,const CeedScalar *** assembled_bases_out)1828 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1829                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1830                                      const CeedScalar ***assembled_bases_out) {
1831   // Assemble B_in, B_out if needed
1832   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1833     CeedInt num_qpts;
1834 
1835     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1836     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1837     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1838       bool        has_eval_none = false;
1839       CeedInt     num_nodes;
1840       CeedScalar *B_in = NULL, *identity = NULL;
1841 
1842       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1843       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1844 
1845       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1846         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1847       }
1848       if (has_eval_none) {
1849         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1850         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1851           identity[i * num_nodes + i] = 1.0;
1852         }
1853       }
1854 
1855       for (CeedInt q = 0; q < num_qpts; q++) {
1856         for (CeedInt n = 0; n < num_nodes; n++) {
1857           CeedInt      d_in              = 0, q_comp_in;
1858           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1859 
1860           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1861             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1862             const CeedScalar *B  = NULL;
1863 
1864             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1865             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1866             if (q_comp_in > 1) {
1867               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1868               else B = &B[(++d_in) * num_qpts * num_nodes];
1869             }
1870             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1871             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1872           }
1873         }
1874       }
1875       if (identity) CeedCall(CeedFree(&identity));
1876       data->assembled_bases_in[b] = B_in;
1877     }
1878   }
1879 
1880   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1881     CeedInt num_qpts;
1882 
1883     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1884     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1885     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1886       bool        has_eval_none = false;
1887       CeedInt     num_nodes;
1888       CeedScalar *B_out = NULL, *identity = NULL;
1889 
1890       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1891       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1892 
1893       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1894         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1895       }
1896       if (has_eval_none) {
1897         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1898         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1899           identity[i * num_nodes + i] = 1.0;
1900         }
1901       }
1902 
1903       for (CeedInt q = 0; q < num_qpts; q++) {
1904         for (CeedInt n = 0; n < num_nodes; n++) {
1905           CeedInt      d_out              = 0, q_comp_out;
1906           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1907 
1908           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1909             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1910             const CeedScalar *B  = NULL;
1911 
1912             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1913             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1914             if (q_comp_out > 1) {
1915               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1916               else B = &B[(++d_out) * num_qpts * num_nodes];
1917             }
1918             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1919             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1920           }
1921         }
1922       }
1923       if (identity) CeedCall(CeedFree(&identity));
1924       data->assembled_bases_out[b] = B_out;
1925     }
1926   }
1927 
1928   // Pass out assembled data
1929   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1930   if (active_bases_in) *active_bases_in = data->active_bases_in;
1931   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1932   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1933   if (active_bases_out) *active_bases_out = data->active_bases_out;
1934   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1935   return CEED_ERROR_SUCCESS;
1936 }
1937 
1938 /**
1939   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1940 
1941   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1942 
1943   @param[in]  data                      `CeedOperatorAssemblyData`
1944   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or `NULL`
1945   @param[out] active_elem_rstrs_in      Pointer to hold active input `CeedElemRestriction`, or `NULL`
1946   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or `NULL`
1947   @param[out] active_elem_rstrs_out     Pointer to hold active output `CeedElemRestriction`, or `NULL`
1948 
1949   @return An error code: 0 - success, otherwise - failure
1950 
1951   @ref Backend
1952 **/
CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data,CeedInt * num_active_elem_rstrs_in,CeedElemRestriction ** active_elem_rstrs_in,CeedInt * num_active_elem_rstrs_out,CeedElemRestriction ** active_elem_rstrs_out)1953 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1954                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1955                                                 CeedElemRestriction **active_elem_rstrs_out) {
1956   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1957   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1958   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1959   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1960   return CEED_ERROR_SUCCESS;
1961 }
1962 
1963 /**
1964   @brief Destroy `CeedOperatorAssemblyData`
1965 
1966   @param[in,out] data `CeedOperatorAssemblyData` to destroy
1967 
1968   @return An error code: 0 - success, otherwise - failure
1969 
1970   @ref Backend
1971 **/
CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData * data)1972 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1973   if (!*data) {
1974     *data = NULL;
1975     return CEED_ERROR_SUCCESS;
1976   }
1977   CeedCall(CeedDestroy(&(*data)->ceed));
1978   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1979     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1980     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1981     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1982     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1983     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1984   }
1985   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1986     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1987     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1988     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1989     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1990     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1991   }
1992   CeedCall(CeedFree(&(*data)->active_bases_in));
1993   CeedCall(CeedFree(&(*data)->active_bases_out));
1994   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1995   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1996   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1997   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1998   CeedCall(CeedFree(&(*data)->eval_modes_in));
1999   CeedCall(CeedFree(&(*data)->eval_modes_out));
2000   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
2001   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
2002   CeedCall(CeedFree(&(*data)->assembled_bases_in));
2003   CeedCall(CeedFree(&(*data)->assembled_bases_out));
2004 
2005   CeedCall(CeedFree(data));
2006   return CEED_ERROR_SUCCESS;
2007 }
2008 
2009 /**
2010   @brief Retrieve fallback `CeedOperator` with a reference `Ceed` for advanced `CeedOperator` functionality
2011 
2012   @param[in]  op          `CeedOperator` to retrieve fallback for
2013   @param[out] op_fallback Fallback `CeedOperator`
2014 
2015   @return An error code: 0 - success, otherwise - failure
2016 
2017   @ref Backend
2018 **/
CeedOperatorGetFallback(CeedOperator op,CeedOperator * op_fallback)2019 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
2020   // Create if needed
2021   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
2022   if (op->op_fallback) {
2023     bool is_debug;
2024     Ceed ceed;
2025 
2026     CeedCall(CeedOperatorGetCeed(op, &ceed));
2027     CeedCall(CeedIsDebug(ceed, &is_debug));
2028     if (is_debug) {
2029       Ceed        ceed_fallback;
2030       const char *resource, *resource_fallback, *op_name;
2031 
2032       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
2033       CeedCall(CeedGetResource(ceed, &resource));
2034       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
2035       CeedCall(CeedOperatorGetName(op, &op_name));
2036 
2037       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
2038       CeedDebug(ceed, "Falling back from Operator with backend %s at address %p to Operator with backend %s at address %p for CeedOperator \"%s\"\n",
2039                 resource, op, resource_fallback, op->op_fallback, op_name);
2040       CeedCall(CeedDestroy(&ceed_fallback));
2041     }
2042     CeedCall(CeedDestroy(&ceed));
2043   }
2044   *op_fallback = op->op_fallback;
2045   return CEED_ERROR_SUCCESS;
2046 }
2047 
2048 /**
2049   @brief Get the parent `CeedOperator` for a fallback `CeedOperator`
2050 
2051   @param[in]  op     `CeedOperator` context
2052   @param[out] parent Variable to store parent `CeedOperator` context
2053 
2054   @return An error code: 0 - success, otherwise - failure
2055 
2056   @ref Backend
2057 **/
CeedOperatorGetFallbackParent(CeedOperator op,CeedOperator * parent)2058 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
2059   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
2060   return CEED_ERROR_SUCCESS;
2061 }
2062 
2063 /**
2064   @brief Get the `Ceed` context of the parent `CeedOperator` for a fallback `CeedOperator`
2065 
2066   @param[in]  op     `CeedOperator` context
2067   @param[out] parent Variable to store parent `Ceed` context
2068 
2069   @return An error code: 0 - success, otherwise - failure
2070 
2071   @ref Backend
2072 **/
CeedOperatorGetFallbackParentCeed(CeedOperator op,Ceed * parent)2073 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
2074   *parent = NULL;
2075   if (op->op_fallback_parent) CeedCall(CeedReferenceCopy(CeedOperatorReturnCeed(op->op_fallback_parent), parent));
2076   else CeedCall(CeedReferenceCopy(CeedOperatorReturnCeed(op), parent));
2077   return CEED_ERROR_SUCCESS;
2078 }
2079 
2080 /// @}
2081 
2082 /// ----------------------------------------------------------------------------
2083 /// CeedOperator Public API
2084 /// ----------------------------------------------------------------------------
2085 /// @addtogroup CeedOperatorUser
2086 /// @{
2087 
2088 /**
2089   @brief Assemble a linear `CeedQFunction` associated with a `CeedOperator`.
2090 
2091   This returns a `CeedVector` containing a matrix at each quadrature point providing the action of the `CeedQFunction` associated with the `CeedOperator`.
2092   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices representing the action of the `CeedQFunction` for a corresponding quadrature point on an element.
2093 
2094   Inputs and outputs are in the order provided by the user when adding `CeedOperator` fields.
2095   For example, a `CeedQFunction` with inputs `u` and `gradu` and outputs `gradv` and `v` , provided in that order, would result in an assembled `CeedQFunction` that consists of `(1 + dim) x (dim + 1)` matrices at each quadrature point acting on the input ` [u, du_0, du_1]` and producing the output `[dv_0, dv_1, v]`.
2096 
2097   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2098 
2099   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2100   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2101   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2102   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2103 
2104   @return An error code: 0 - success, otherwise - failure
2105 
2106   @ref User
2107 **/
CeedOperatorLinearAssembleQFunction(CeedOperator op,CeedVector * assembled,CeedElemRestriction * rstr,CeedRequest * request)2108 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2109   CeedCall(CeedOperatorCheckReady(op));
2110 
2111   if (op->LinearAssembleQFunction) {
2112     // Backend version
2113     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
2114   } else {
2115     // Operator fallback
2116     CeedOperator op_fallback;
2117 
2118     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleQFunction\n");
2119     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2120     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
2121     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
2122   }
2123   return CEED_ERROR_SUCCESS;
2124 }
2125 
2126 /**
2127   @brief Assemble `CeedQFunction` and store result internally.
2128 
2129   Return copied references of stored data to the caller.
2130   Caller is responsible for ownership and destruction of the copied references.
2131   See also @ref CeedOperatorLinearAssembleQFunction().
2132 
2133   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
2134         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
2135 
2136   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2137   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2138   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2139   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2140 
2141   @return An error code: 0 - success, otherwise - failure
2142 
2143   @ref User
2144 **/
CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op,CeedVector * assembled,CeedElemRestriction * rstr,CeedRequest * request)2145 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2146   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, true, assembled, rstr, request);
2147 }
2148 
2149 /**
2150   @brief Assemble the diagonal of a square linear `CeedOperator`
2151 
2152   This overwrites a `CeedVector` with the diagonal of a linear `CeedOperator`.
2153 
2154   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2155 
2156   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2157 
2158   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2159   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2160   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2161 
2162   @return An error code: 0 - success, otherwise - failure
2163 
2164   @ref User
2165 **/
CeedOperatorLinearAssembleDiagonal(CeedOperator op,CeedVector assembled,CeedRequest * request)2166 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2167   bool     is_composite;
2168   CeedSize input_size = 0, output_size = 0;
2169 
2170   CeedCall(CeedOperatorCheckReady(op));
2171   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2172 
2173   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2174   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2175 
2176   // Early exit for empty operator
2177   if (!is_composite) {
2178     CeedInt num_elem = 0;
2179 
2180     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2181     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2182   }
2183 
2184   if (op->LinearAssembleDiagonal) {
2185     // Backend version
2186     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
2187     return CEED_ERROR_SUCCESS;
2188   } else if (op->LinearAssembleAddDiagonal) {
2189     // Backend version with zeroing first
2190     CeedCall(CeedVectorSetValue(assembled, 0.0));
2191     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2192     return CEED_ERROR_SUCCESS;
2193   } else if (is_composite) {
2194     // Default to summing contributions of suboperators
2195     CeedCall(CeedVectorSetValue(assembled, 0.0));
2196     CeedCall(CeedOperatorLinearAssembleAddDiagonalComposite(op, request, false, assembled));
2197     return CEED_ERROR_SUCCESS;
2198   } else {
2199     // Operator fallback
2200     CeedOperator op_fallback;
2201 
2202     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleDiagonal\n");
2203     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2204     if (op_fallback) {
2205       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
2206       return CEED_ERROR_SUCCESS;
2207     }
2208   }
2209   // Default interface implementation
2210   CeedCall(CeedVectorSetValue(assembled, 0.0));
2211   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
2212   return CEED_ERROR_SUCCESS;
2213 }
2214 
2215 /**
2216   @brief Assemble the diagonal of a square linear `CeedOperator`.
2217 
2218   This sums into a `CeedVector` the diagonal of a linear `CeedOperator`.
2219 
2220   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2221 
2222   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2223 
2224   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2225   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2226   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2227 
2228   @return An error code: 0 - success, otherwise - failure
2229 
2230   @ref User
2231 **/
CeedOperatorLinearAssembleAddDiagonal(CeedOperator op,CeedVector assembled,CeedRequest * request)2232 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2233   bool     is_composite;
2234   CeedSize input_size = 0, output_size = 0;
2235 
2236   CeedCall(CeedOperatorCheckReady(op));
2237   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2238 
2239   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2240   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2241 
2242   // Early exit for empty operator
2243   if (!is_composite) {
2244     CeedInt num_elem = 0;
2245 
2246     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2247     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2248   }
2249 
2250   if (op->LinearAssembleAddDiagonal) {
2251     // Backend version
2252     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2253     return CEED_ERROR_SUCCESS;
2254   } else if (is_composite) {
2255     // Default to summing contributions of suboperators
2256     CeedCall(CeedOperatorLinearAssembleAddDiagonalComposite(op, request, false, assembled));
2257     return CEED_ERROR_SUCCESS;
2258   } else {
2259     // Operator fallback
2260     CeedOperator op_fallback;
2261 
2262     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleAddDiagonal\n");
2263     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2264     if (op_fallback) {
2265       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
2266       return CEED_ERROR_SUCCESS;
2267     }
2268   }
2269   // Default interface implementation
2270   CeedCall(CeedOperatorLinearAssembleAddDiagonalSingle(op, request, false, assembled));
2271   return CEED_ERROR_SUCCESS;
2272 }
2273 
2274 /**
2275    @brief Fully assemble the point-block diagonal pattern of a linear `CeedOperator`.
2276 
2277    Expected to be used in conjunction with @ref CeedOperatorLinearAssemblePointBlockDiagonal().
2278 
2279    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2280    Note that the `(i, j)` pairs are unique.
2281    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in the same ordering.
2282 
2283    This will generally be slow unless your operator is low-order.
2284 
2285    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2286 
2287    @param[in]  op          `CeedOperator` to assemble
2288    @param[out] num_entries Number of entries in coordinate nonzero pattern
2289    @param[out] rows        Row number for each entry
2290    @param[out] cols        Column number for each entry
2291 
2292    @ref User
2293 **/
CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op,CeedSize * num_entries,CeedInt ** rows,CeedInt ** cols)2294 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2295   bool          is_composite;
2296   CeedInt       num_active_components, num_sub_operators;
2297   CeedOperator *sub_operators;
2298 
2299   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2300 
2301   CeedSize input_size = 0, output_size = 0;
2302   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2303   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2304 
2305   if (is_composite) {
2306     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_sub_operators));
2307     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2308   } else {
2309     sub_operators     = &op;
2310     num_sub_operators = 1;
2311   }
2312 
2313   // Verify operator can be assembled correctly
2314   {
2315     CeedOperatorAssemblyData data;
2316     CeedInt                  num_active_elem_rstrs, comp_stride;
2317     CeedElemRestriction     *active_elem_rstrs;
2318 
2319     // Get initial values to check against
2320     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
2321     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2322     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
2323     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
2324 
2325     // Verify that all active element restrictions have same component stride and number of components
2326     for (CeedInt k = 0; k < num_sub_operators; k++) {
2327       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
2328       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2329       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
2330         CeedInt comp_stride_sub, num_active_components_sub;
2331 
2332         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
2333         CeedCheck(comp_stride == comp_stride_sub, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION,
2334                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
2335         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
2336         CeedCheck(num_active_components == num_active_components_sub, CeedOperatorReturnCeed(op), CEED_ERROR_INCOMPATIBLE,
2337                   "All suboperators must have the same number of output components."
2338                   " Previous: %" CeedInt_FMT " Current: %" CeedInt_FMT,
2339                   num_active_components, num_active_components_sub);
2340       }
2341     }
2342   }
2343   *num_entries = input_size * num_active_components;
2344   CeedCall(CeedCalloc(*num_entries, rows));
2345   CeedCall(CeedCalloc(*num_entries, cols));
2346 
2347   for (CeedInt o = 0; o < num_sub_operators; o++) {
2348     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2349     CeedInt             comp_stride, num_elem, elem_size;
2350     const CeedInt      *offsets, *point_block_offsets;
2351 
2352     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2353     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2354     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2355     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2356     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2357 
2358     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2359     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2360 
2361     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2362       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2363         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2364           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2365           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2366         }
2367       }
2368     }
2369 
2370     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2371     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2372     CeedCall(CeedElemRestrictionDestroy(&active_elem_rstr));
2373     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2374   }
2375   return CEED_ERROR_SUCCESS;
2376 }
2377 
2378 /**
2379   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2380 
2381   This overwrites a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2382 
2383   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2384 
2385   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2386 
2387   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2388   @param[out] assembled `CeedVector` to store assembled `CeedOperator` point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2389                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2390                           The array has shape `[nodes, component out, component in]`.
2391   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2392 
2393   @return An error code: 0 - success, otherwise - failure
2394 
2395   @ref User
2396 **/
CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,CeedVector assembled,CeedRequest * request)2397 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2398   bool     is_composite;
2399   CeedSize input_size = 0, output_size = 0;
2400 
2401   CeedCall(CeedOperatorCheckReady(op));
2402   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2403 
2404   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2405   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2406 
2407   // Early exit for empty operator
2408   if (!is_composite) {
2409     CeedInt num_elem = 0;
2410 
2411     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2412     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2413   }
2414 
2415   if (op->LinearAssemblePointBlockDiagonal) {
2416     // Backend version
2417     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2418     return CEED_ERROR_SUCCESS;
2419   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2420     // Backend version with zeroing first
2421     CeedCall(CeedVectorSetValue(assembled, 0.0));
2422     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2423     return CEED_ERROR_SUCCESS;
2424   } else {
2425     // Operator fallback
2426     CeedOperator op_fallback;
2427 
2428     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssemblePointBlockDiagonal\n");
2429     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2430     if (op_fallback) {
2431       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2432       return CEED_ERROR_SUCCESS;
2433     }
2434   }
2435   // Default interface implementation
2436   CeedCall(CeedVectorSetValue(assembled, 0.0));
2437   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2438   return CEED_ERROR_SUCCESS;
2439 }
2440 
2441 /**
2442   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2443 
2444   This sums into a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2445 
2446   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2447 
2448   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2449 
2450   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2451   @param[out] assembled `CeedVector` to store assembled CeedOperator point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2452                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2453                           The array has shape `[nodes, component out, component in]`.
2454   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2455 
2456   @return An error code: 0 - success, otherwise - failure
2457 
2458   @ref User
2459 **/
CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,CeedVector assembled,CeedRequest * request)2460 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2461   bool     is_composite;
2462   CeedSize input_size = 0, output_size = 0;
2463 
2464   CeedCall(CeedOperatorCheckReady(op));
2465   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2466 
2467   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2468   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2469 
2470   // Early exit for empty operator
2471   if (!is_composite) {
2472     CeedInt num_elem = 0;
2473 
2474     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2475     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2476   }
2477 
2478   if (op->LinearAssembleAddPointBlockDiagonal) {
2479     // Backend version
2480     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2481     return CEED_ERROR_SUCCESS;
2482   } else {
2483     // Operator fallback
2484     CeedOperator op_fallback;
2485 
2486     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleAddPointBlockDiagonal\n");
2487     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2488     if (op_fallback) {
2489       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2490       return CEED_ERROR_SUCCESS;
2491     }
2492   }
2493   // Default interface implementation
2494   if (is_composite) {
2495     CeedCall(CeedOperatorLinearAssembleAddDiagonalComposite(op, request, true, assembled));
2496   } else {
2497     CeedCall(CeedOperatorLinearAssembleAddDiagonalSingle(op, request, true, assembled));
2498   }
2499   return CEED_ERROR_SUCCESS;
2500 }
2501 
2502 /**
2503    @brief Fully assemble the nonzero pattern of a linear `CeedOperator`.
2504 
2505    Expected to be used in conjunction with @ref CeedOperatorLinearAssemble().
2506 
2507    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2508    Note that the `(i, j)` pairs are not unique and may repeat.
2509    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemble() provides the values in the same ordering.
2510 
2511    This will generally be slow unless your operator is low-order.
2512 
2513    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2514 
2515    @param[in]  op          `CeedOperator` to assemble
2516    @param[out] num_entries Number of entries in coordinate nonzero pattern
2517    @param[out] rows        Row number for each entry
2518    @param[out] cols        Column number for each entry
2519 
2520    @ref User
2521 **/
CeedOperatorLinearAssembleSymbolic(CeedOperator op,CeedSize * num_entries,CeedInt ** rows,CeedInt ** cols)2522 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2523   bool          is_composite;
2524   CeedInt       num_suboperators, offset = 0;
2525   CeedSize      single_entries;
2526   CeedOperator *sub_operators;
2527 
2528   CeedCall(CeedOperatorCheckReady(op));
2529   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2530 
2531   if (op->LinearAssembleSymbolic) {
2532     // Backend version
2533     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2534     return CEED_ERROR_SUCCESS;
2535   } else {
2536     // Operator fallback
2537     CeedOperator op_fallback;
2538 
2539     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleSymbolic\n");
2540     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2541     if (op_fallback) {
2542       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2543       return CEED_ERROR_SUCCESS;
2544     }
2545   }
2546 
2547   // Default interface implementation
2548 
2549   // Count entries and allocate rows, cols arrays
2550   CeedCall(CeedOperatorLinearAssembleGetNumEntries(op, num_entries));
2551   CeedCall(CeedCalloc(*num_entries, rows));
2552   CeedCall(CeedCalloc(*num_entries, cols));
2553 
2554   // Assemble nonzero locations
2555   if (is_composite) {
2556     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2557     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2558     for (CeedInt k = 0; k < num_suboperators; ++k) {
2559       CeedCall(CeedOperatorAssembleSymbolicSingle(sub_operators[k], offset, *rows, *cols));
2560       CeedCall(CeedOperatorAssemblyCountEntriesSingle(sub_operators[k], &single_entries));
2561       offset += single_entries;
2562     }
2563   } else {
2564     CeedCall(CeedOperatorAssembleSymbolicSingle(op, offset, *rows, *cols));
2565   }
2566   return CEED_ERROR_SUCCESS;
2567 }
2568 
2569 /**
2570    @brief Fully assemble the nonzero entries of a linear operator.
2571 
2572    Expected to be used in conjunction with @ref CeedOperatorLinearAssembleSymbolic().
2573 
2574    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2575    Note that the `(i, j)` pairs are not unique and may repeat.
2576    This function returns the values of the nonzero entries to be added, their `(i, j)` locations are provided by @ref CeedOperatorLinearAssembleSymbolic().
2577 
2578    This will generally be slow unless your operator is low-order.
2579 
2580    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2581 
2582    @param[in]  op     `CeedOperator` to assemble
2583    @param[out] values Values to assemble into matrix
2584 
2585    @ref User
2586 **/
CeedOperatorLinearAssemble(CeedOperator op,CeedVector values)2587 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2588   bool          is_composite;
2589   CeedInt       num_suboperators, offset = 0;
2590   CeedSize      single_entries = 0;
2591   CeedOperator *sub_operators;
2592 
2593   CeedCall(CeedOperatorCheckReady(op));
2594   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2595 
2596   // Early exit for empty operator
2597   if (!is_composite) {
2598     CeedInt num_elem = 0;
2599 
2600     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2601     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2602   }
2603 
2604   if (op->LinearAssemble) {
2605     // Backend version
2606     CeedCall(op->LinearAssemble(op, values));
2607     return CEED_ERROR_SUCCESS;
2608   } else if (is_composite) {
2609     // Default to summing contributions of suboperators
2610     CeedCall(CeedVectorSetValue(values, 0.0));
2611     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2612     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2613     for (CeedInt k = 0; k < num_suboperators; k++) {
2614       CeedCall(CeedOperatorAssembleSingle(sub_operators[k], offset, values));
2615       CeedCall(CeedOperatorAssemblyCountEntriesSingle(sub_operators[k], &single_entries));
2616       offset += single_entries;
2617     }
2618     return CEED_ERROR_SUCCESS;
2619   } else if (op->LinearAssembleSingle) {
2620     CeedCall(CeedVectorSetValue(values, 0.0));
2621     CeedCall(CeedOperatorAssembleSingle(op, offset, values));
2622     return CEED_ERROR_SUCCESS;
2623   } else {
2624     // Operator fallback
2625     CeedOperator op_fallback;
2626 
2627     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssemble\n");
2628     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2629     if (op_fallback) {
2630       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2631       return CEED_ERROR_SUCCESS;
2632     }
2633   }
2634 
2635   // Default to interface version if non-composite and no fallback
2636   CeedCall(CeedVectorSetValue(values, 0.0));
2637   CeedCall(CeedOperatorAssembleSingle(op, offset, values));
2638   return CEED_ERROR_SUCCESS;
2639 }
2640 
2641 /**
2642   @brief Get the multiplicity of nodes across sub-operators in a composite `CeedOperator`.
2643 
2644   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2645 
2646   @param[in]  op               Composite `CeedOperator`
2647   @param[in]  num_skip_indices Number of sub-operators to skip
2648   @param[in]  skip_indices     Array of indices of sub-operators to skip
2649   @param[out] mult             Vector to store multiplicity (of size `l_size` )
2650 
2651   @return An error code: 0 - success, otherwise - failure
2652 
2653   @ref User
2654 **/
CeedOperatorCompositeGetMultiplicity(CeedOperator op,CeedInt num_skip_indices,CeedInt * skip_indices,CeedVector mult)2655 int CeedOperatorCompositeGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2656   Ceed                ceed;
2657   CeedInt             num_suboperators;
2658   CeedSize            l_vec_len;
2659   CeedScalar         *mult_array;
2660   CeedVector          ones_l_vec;
2661   CeedElemRestriction elem_rstr, mult_elem_rstr;
2662   CeedOperator       *sub_operators;
2663 
2664   CeedCall(CeedOperatorCheckReady(op));
2665 
2666   // Zero mult vector
2667   CeedCall(CeedVectorSetValue(mult, 0.0));
2668 
2669   // Get suboperators
2670   CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2671   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2672   CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2673 
2674   // Work vector
2675   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2676   CeedCall(CeedOperatorGetCeed(op, &ceed));
2677   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2678   CeedCall(CeedDestroy(&ceed));
2679   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2680   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2681 
2682   // Compute multiplicity across suboperators
2683   for (CeedInt i = 0; i < num_suboperators; i++) {
2684     const CeedScalar *sub_mult_array;
2685     CeedVector        sub_mult_l_vec, ones_e_vec;
2686 
2687     // -- Check for suboperator to skip
2688     for (CeedInt j = 0; j < num_skip_indices; j++) {
2689       if (skip_indices[j] == i) continue;
2690     }
2691 
2692     // -- Sub operator multiplicity
2693     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2694     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2695     CeedCall(CeedElemRestrictionDestroy(&elem_rstr));
2696     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2697     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2698     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2699     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2700     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2701     // ---- Flag every node present in the current suboperator
2702     for (CeedSize j = 0; j < l_vec_len; j++) {
2703       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2704     }
2705     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2706     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2707     CeedCall(CeedVectorDestroy(&ones_e_vec));
2708     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2709   }
2710   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2711   CeedCall(CeedVectorDestroy(&ones_l_vec));
2712   return CEED_ERROR_SUCCESS;
2713 }
2714 
2715 /**
2716   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`, creating the prolongation basis from the fine and coarse grid interpolation.
2717 
2718   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2719 
2720   @param[in]  op_fine      Fine grid `CeedOperator`
2721   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2722   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
2723   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
2724   @param[out] op_coarse    Coarse grid `CeedOperator`
2725   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
2726   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
2727 
2728   @return An error code: 0 - success, otherwise - failure
2729 
2730   @ref User
2731 **/
CeedOperatorMultigridLevelCreate(CeedOperator op_fine,CeedVector p_mult_fine,CeedElemRestriction rstr_coarse,CeedBasis basis_coarse,CeedOperator * op_coarse,CeedOperator * op_prolong,CeedOperator * op_restrict)2732 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2733                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2734   CeedBasis basis_c_to_f = NULL;
2735 
2736   CeedCall(CeedOperatorCheckReady(op_fine));
2737 
2738   // Build prolongation matrix, if required
2739   if (op_prolong || op_restrict) {
2740     CeedBasis basis_fine;
2741 
2742     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2743     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2744     CeedCall(CeedBasisDestroy(&basis_fine));
2745   }
2746 
2747   // Core code
2748   CeedCall(CeedOperatorMultigridLevelCreateSingle_Core(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong,
2749                                                        op_restrict));
2750   return CEED_ERROR_SUCCESS;
2751 }
2752 
2753 /**
2754   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a tensor basis for the active basis.
2755 
2756   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2757 
2758   @param[in]  op_fine       Fine grid `CeedOperator`
2759   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2760   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2761   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2762   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2763   @param[out] op_coarse     Coarse grid `CeedOperator`
2764   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2765   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2766 
2767   @return An error code: 0 - success, otherwise - failure
2768 
2769   @ref User
2770 **/
CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,CeedVector p_mult_fine,CeedElemRestriction rstr_coarse,CeedBasis basis_coarse,const CeedScalar * interp_c_to_f,CeedOperator * op_coarse,CeedOperator * op_prolong,CeedOperator * op_restrict)2771 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2772                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2773                                              CeedOperator *op_restrict) {
2774   Ceed      ceed;
2775   CeedInt   Q_f, Q_c;
2776   CeedBasis basis_fine, basis_c_to_f = NULL;
2777 
2778   CeedCall(CeedOperatorCheckReady(op_fine));
2779   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2780 
2781   // Check for compatible quadrature spaces
2782   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2783   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2784   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2785   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION,
2786             "Bases must have compatible quadrature spaces."
2787             " Fine grid: %" CeedInt_FMT " points, Coarse grid: %" CeedInt_FMT " points",
2788             Q_f, Q_c);
2789 
2790   // Create coarse to fine basis, if required
2791   if (op_prolong || op_restrict) {
2792     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2793     CeedScalar *q_ref, *q_weight, *grad;
2794 
2795     // Check if interpolation matrix is provided
2796     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2797               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2798     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2799     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2800     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2801     CeedCall(CeedBasisDestroy(&basis_fine));
2802     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2803     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2804     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2805     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2806     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2807     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2808     CeedCall(CeedFree(&q_ref));
2809     CeedCall(CeedFree(&q_weight));
2810     CeedCall(CeedFree(&grad));
2811   }
2812 
2813   // Core code
2814   CeedCall(CeedOperatorMultigridLevelCreateSingle_Core(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong,
2815                                                        op_restrict));
2816   CeedCall(CeedDestroy(&ceed));
2817   return CEED_ERROR_SUCCESS;
2818 }
2819 
2820 /**
2821   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a non-tensor basis for the active vector
2822 
2823   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2824 
2825   @param[in]  op_fine       Fine grid `CeedOperator`
2826   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2827   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2828   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2829   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2830   @param[out] op_coarse     Coarse grid `CeedOperator`
2831   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2832   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2833 
2834   @return An error code: 0 - success, otherwise - failure
2835 
2836   @ref User
2837 **/
CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,CeedVector p_mult_fine,CeedElemRestriction rstr_coarse,CeedBasis basis_coarse,const CeedScalar * interp_c_to_f,CeedOperator * op_coarse,CeedOperator * op_prolong,CeedOperator * op_restrict)2838 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2839                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2840                                        CeedOperator *op_restrict) {
2841   Ceed      ceed;
2842   CeedInt   Q_f, Q_c;
2843   CeedBasis basis_fine, basis_c_to_f = NULL;
2844 
2845   CeedCall(CeedOperatorCheckReady(op_fine));
2846   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2847 
2848   // Check for compatible quadrature spaces
2849   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2850   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2851   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2852   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2853 
2854   // Coarse to fine basis
2855   if (op_prolong || op_restrict) {
2856     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2857     CeedScalar      *q_ref, *q_weight, *grad;
2858     CeedElemTopology topo;
2859 
2860     // Check if interpolation matrix is provided
2861     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2862               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2863     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2864     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2865     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2866     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2867     CeedCall(CeedBasisDestroy(&basis_fine));
2868     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2869     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2870     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2871     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2872     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2873     CeedCall(CeedFree(&q_ref));
2874     CeedCall(CeedFree(&q_weight));
2875     CeedCall(CeedFree(&grad));
2876   }
2877 
2878   // Core code
2879   CeedCall(CeedOperatorMultigridLevelCreateSingle_Core(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong,
2880                                                        op_restrict));
2881   CeedCall(CeedDestroy(&ceed));
2882   return CEED_ERROR_SUCCESS;
2883 }
2884 
2885 /**
2886   @brief Build a FDM based approximate inverse for each element for a `CeedOperator`.
2887 
2888   This returns a `CeedOperator` and `CeedVector` to apply a Fast Diagonalization Method based approximate inverse.
2889   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2890   The assembled `CeedQFunction` is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T \hat S V\f$.
2891   The `CeedOperator` must be linear and non-composite.
2892   The associated `CeedQFunction` must therefore also be linear.
2893 
2894   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2895 
2896   @param[in]  op      `CeedOperator` to create element inverses
2897   @param[out] fdm_inv `CeedOperator` to apply the action of a FDM based inverse for each element
2898   @param[in]  request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2899 
2900   @return An error code: 0 - success, otherwise - failure
2901 
2902   @ref User
2903 **/
CeedOperatorCreateFDMElementInverse(CeedOperator op,CeedOperator * fdm_inv,CeedRequest * request)2904 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2905   Ceed                 ceed, ceed_parent;
2906   bool                 interp = false, grad = false, is_tensor_basis = true;
2907   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2908   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2909   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2910   CeedVector           q_data;
2911   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2912   CeedBasis            basis = NULL, fdm_basis;
2913   CeedQFunctionContext ctx_fdm;
2914   CeedQFunctionField  *qf_fields;
2915   CeedQFunction        qf, qf_fdm;
2916   CeedOperatorField   *op_fields;
2917 
2918   CeedCall(CeedOperatorCheckReady(op));
2919 
2920   if (op->CreateFDMElementInverse) {
2921     // Backend version
2922     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2923     return CEED_ERROR_SUCCESS;
2924   } else {
2925     // Operator fallback
2926     CeedOperator op_fallback;
2927 
2928     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorCreateFDMElementInverse\n");
2929     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2930     if (op_fallback) {
2931       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2932       return CEED_ERROR_SUCCESS;
2933     }
2934   }
2935 
2936   // Default interface implementation
2937   CeedCall(CeedOperatorGetCeed(op, &ceed));
2938   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2939   CeedCall(CeedOperatorGetQFunction(op, &qf));
2940 
2941   // Determine active input basis
2942   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2943   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2944   for (CeedInt i = 0; i < num_input_fields; i++) {
2945     CeedVector vec;
2946 
2947     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2948     if (vec == CEED_VECTOR_ACTIVE) {
2949       CeedEvalMode eval_mode;
2950 
2951       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2952       interp = interp || eval_mode == CEED_EVAL_INTERP;
2953       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2954       if (!basis) CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2955       if (!rstr) CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2956     }
2957     CeedCall(CeedVectorDestroy(&vec));
2958   }
2959   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2960   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2961   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2962   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2963   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2964   CeedCall(CeedBasisGetDimension(basis, &dim));
2965   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2966   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2967 
2968   // Build and diagonalize 1D Mass and Laplacian
2969   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2970   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2971   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2972   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2973   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2974   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2975   CeedCall(CeedCalloc(P_1d, &lambda));
2976   // -- Build matrices
2977   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2978   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2979   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2980   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2981 
2982   // -- Diagonalize
2983   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2984   CeedCall(CeedFree(&mass));
2985   CeedCall(CeedFree(&laplace));
2986   for (CeedInt i = 0; i < P_1d; i++) {
2987     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2988   }
2989   CeedCall(CeedFree(&x));
2990 
2991   {
2992     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2993     CeedScalar          max_norm = 0;
2994     const CeedScalar   *assembled_array, *q_weight_array;
2995     CeedVector          assembled = NULL, q_weight;
2996     CeedElemRestriction rstr_qf   = NULL;
2997 
2998     // Assemble QFunction
2999     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
3000     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, layout));
3001     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
3002     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
3003 
3004     // Calculate element averages
3005     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
3006     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
3007     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
3008     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
3009     CeedCall(CeedCalloc(num_elem, &elem_avg));
3010     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
3011 
3012     for (CeedInt e = 0; e < num_elem; e++) {
3013       CeedInt count = 0;
3014 
3015       for (CeedInt q = 0; q < num_qpts; q++) {
3016         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
3017           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
3018             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
3019             count++;
3020           }
3021         }
3022       }
3023       if (count) {
3024         elem_avg[e] /= count;
3025       } else {
3026         elem_avg[e] = 1.0;
3027       }
3028     }
3029     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
3030     CeedCall(CeedVectorDestroy(&assembled));
3031     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
3032     CeedCall(CeedVectorDestroy(&q_weight));
3033   }
3034 
3035   // Build FDM diagonal
3036   {
3037     CeedScalar *q_data_array, *fdm_diagonal;
3038 
3039     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
3040     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
3041     for (CeedInt c = 0; c < num_comp; c++) {
3042       for (CeedInt n = 0; n < num_nodes; n++) {
3043         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
3044         if (grad) {
3045           for (CeedInt d = 0; d < dim; d++) {
3046             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
3047             fdm_diagonal[c * num_nodes + n] += lambda[i];
3048           }
3049         }
3050         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
3051       }
3052     }
3053     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
3054     CeedCall(CeedVectorSetValue(q_data, 0.0));
3055     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
3056     for (CeedInt e = 0; e < num_elem; e++) {
3057       for (CeedInt c = 0; c < num_comp; c++) {
3058         for (CeedInt n = 0; n < num_nodes; n++) {
3059           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
3060         }
3061       }
3062     }
3063     CeedCall(CeedFree(&elem_avg));
3064     CeedCall(CeedFree(&fdm_diagonal));
3065     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
3066   }
3067 
3068   // Setup FDM operator
3069   // -- Basis
3070   {
3071     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
3072 
3073     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
3074     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
3075     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
3076     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
3077     CeedCall(CeedFree(&fdm_interp));
3078     CeedCall(CeedFree(&grad_dummy));
3079     CeedCall(CeedFree(&q_ref_dummy));
3080     CeedCall(CeedFree(&q_weight_dummy));
3081     CeedCall(CeedFree(&lambda));
3082   }
3083 
3084   // -- Restriction
3085   {
3086     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
3087     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp,
3088                                               (CeedSize)num_elem * (CeedSize)num_comp * (CeedSize)num_nodes, strides, &rstr_qd_i));
3089   }
3090 
3091   // -- QFunction
3092   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
3093   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
3094   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
3095   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
3096   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
3097 
3098   // -- QFunction context
3099   {
3100     CeedInt *num_comp_data;
3101 
3102     CeedCall(CeedCalloc(1, &num_comp_data));
3103     num_comp_data[0] = num_comp;
3104     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
3105     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
3106   }
3107   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
3108   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
3109 
3110   // -- Operator
3111   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
3112   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3113   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
3114   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3115 
3116   // Cleanup
3117   CeedCall(CeedDestroy(&ceed));
3118   CeedCall(CeedDestroy(&ceed_parent));
3119   CeedCall(CeedVectorDestroy(&q_data));
3120   CeedCall(CeedElemRestrictionDestroy(&rstr));
3121   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
3122   CeedCall(CeedBasisDestroy(&basis));
3123   CeedCall(CeedBasisDestroy(&fdm_basis));
3124   CeedCall(CeedQFunctionDestroy(&qf));
3125   CeedCall(CeedQFunctionDestroy(&qf_fdm));
3126   return CEED_ERROR_SUCCESS;
3127 }
3128 
3129 /// @}
3130