xref: /honee/src/mat-ceed.c (revision 000d20329dbbfe7106be37dc83eb49e6b9acfabe)
1 /// @file
2 /// MatCEED implementation
3 
4 #include <ceed.h>
5 #include <ceed/backend.h>
6 #include <mat-ceed-impl.h>
7 #include <mat-ceed.h>
8 #include <petsc-ceed-utils.h>
9 #include <petsc-ceed.h>
10 #include <petscdm.h>
11 #include <petscmat.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <petsc/private/petscimpl.h>
17 
18 PetscClassId  MATCEED_CLASSID;
19 PetscLogEvent MATCEED_MULT, MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE, MATCEED_MULT_TRANSPOSE_CEEDOP, MATCEED_ASSEMBLE_DIAGONAL,
20     MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, MATCEED_SETUP_PBDIAGONAL, MATCEED_SETUP_PBDIAGONAL_CEEDOP, MATCEED_ASSEMBLE_PBDIAGONAL,
21     MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, MATCEED_SETUP_FULL, MATCEED_SETUP_FULL_CEEDOP, MATCEED_ASSEMBLE_FULL, MATCEED_ASSEMBLE_FULL_CEEDOP;
22 
23 /**
24   @brief Register MATCEED log events.
25 
26   Not collective across MPI processes.
27 
28   @return An error code: 0 - success, otherwise - failure
29 **/
30 static PetscErrorCode MatCeedRegisterLogEvents() {
31   static PetscBool registered = PETSC_FALSE;
32 
33   PetscFunctionBeginUser;
34   if (registered) PetscFunctionReturn(PETSC_SUCCESS);
35   PetscCall(PetscClassIdRegister("MatCEED", &MATCEED_CLASSID));
36   PetscCall(PetscLogEventRegister("MatCEEDMul", MATCEED_CLASSID, &MATCEED_MULT));
37   PetscCall(PetscLogEventRegister("MatCEEDMulCeed", MATCEED_CLASSID, &MATCEED_MULT_CEEDOP));
38   PetscCall(PetscLogEventRegister("MatCEEDMulT", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE));
39   PetscCall(PetscLogEventRegister("MatCEEDMulTCeed", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE_CEEDOP));
40   PetscCall(PetscLogEventRegister("MatCEEDAsmDiag", MATCEED_CLASSID, &MATCEED_ASSEMBLE_DIAGONAL));
41   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSU", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL));
42   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL_CEEDOP));
43   PetscCall(PetscLogEventRegister("MatCEEDAsmPBD", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL));
44   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP));
45   PetscCall(PetscLogEventRegister("MatCEEDAsmSU", MATCEED_CLASSID, &MATCEED_SETUP_FULL));
46   PetscCall(PetscLogEventRegister("MatCEEDAsmSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_FULL_CEEDOP));
47   PetscCall(PetscLogEventRegister("MatCEEDAsm", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL));
48   PetscCall(PetscLogEventRegister("MatCEEDAsmCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL_CEEDOP));
49   registered = PETSC_TRUE;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 /**
54   @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar.
55          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
56          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
57 
58   Collective across MPI processes.
59 
60   @param[in]      mat_ceed  `MATCEED` to assemble
61   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
62 
63   @return An error code: 0 - success, otherwise - failure
64 **/
65 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) {
66   MatCeedContext ctx;
67 
68   PetscFunctionBeginUser;
69   PetscCall(MatShellGetContext(mat_ceed, &ctx));
70 
71   // Check if COO pattern set
72   {
73     PetscInt index = -1;
74 
75     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
76       if (ctx->mats_assembled_pbd[i] == mat_coo) index = i;
77     }
78     if (index == -1) {
79       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
80       CeedInt      *rows_ceed, *cols_ceed;
81       PetscCount    num_entries;
82       PetscLogStage stage_amg_setup;
83 
84       // -- Assemble sparsity pattern if mat hasn't been assembled before
85       PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
86       if (stage_amg_setup == -1) {
87         PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
88       }
89       PetscCall(PetscLogStagePush(stage_amg_setup));
90       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
91       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
92       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
93       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
94       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
95       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
96       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
97       free(rows_petsc);
98       free(cols_petsc);
99       if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd));
100       PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd));
101       ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo;
102       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
103       PetscCall(PetscLogStagePop());
104     }
105   }
106 
107   // Assemble mat_ceed
108   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
109   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
110   {
111     const CeedScalar *values;
112     MatType           mat_type;
113     CeedMemType       mem_type = CEED_MEM_HOST;
114     PetscBool         is_spd, is_spd_known;
115 
116     PetscCall(MatGetType(mat_coo, &mat_type));
117     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
118     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
119     else mem_type = CEED_MEM_HOST;
120 
121     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
122     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE));
123     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
124     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values));
125     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
126     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
127     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
128     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values));
129   }
130   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
131   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
132   PetscFunctionReturn(PETSC_SUCCESS);
133 }
134 
135 /**
136   @brief Assemble inner `Mat` for diagonal `PC` operations
137 
138   Collective across MPI processes.
139 
140   @param[in]   mat_ceed      `MATCEED` to invert
141   @param[in]   use_ceed_pbd  Boolean flag to use libCEED PBD assembly
142   @param[out]  mat_inner     Inner `Mat` for diagonal operations
143 
144   @return An error code: 0 - success, otherwise - failure
145 **/
146 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) {
147   MatCeedContext ctx;
148 
149   PetscFunctionBeginUser;
150   PetscCall(MatShellGetContext(mat_ceed, &ctx));
151   if (use_ceed_pbd) {
152     // Check if COO pattern set
153     if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_pbd_internal));
154 
155     // Assemble mat_assembled_full_internal
156     PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal));
157     if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal;
158   } else {
159     // Check if COO pattern set
160     if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
161 
162     // Assemble mat_assembled_full_internal
163     PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
164     if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal;
165   }
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 /**
170   @brief Get `MATCEED` diagonal block for Jacobi.
171 
172   Collective across MPI processes.
173 
174   @param[in]   mat_ceed   `MATCEED` to invert
175   @param[out]  mat_block  The diagonal block matrix
176 
177   @return An error code: 0 - success, otherwise - failure
178 **/
179 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) {
180   Mat            mat_inner = NULL;
181   MatCeedContext ctx;
182 
183   PetscFunctionBeginUser;
184   PetscCall(MatShellGetContext(mat_ceed, &ctx));
185 
186   // Assemble inner mat if needed
187   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
188 
189   // Get block diagonal
190   PetscCall(MatGetDiagonalBlock(mat_inner, mat_block));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 /**
195   @brief Invert `MATCEED` diagonal block for Jacobi.
196 
197   Collective across MPI processes.
198 
199   @param[in]   mat_ceed  `MATCEED` to invert
200   @param[out]  values    The block inverses in column major order
201 
202   @return An error code: 0 - success, otherwise - failure
203 **/
204 static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) {
205   Mat            mat_inner = NULL;
206   MatCeedContext ctx;
207 
208   PetscFunctionBeginUser;
209   PetscCall(MatShellGetContext(mat_ceed, &ctx));
210 
211   // Assemble inner mat if needed
212   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
213 
214   // Invert PB diagonal
215   PetscCall(MatInvertBlockDiagonal(mat_inner, values));
216   PetscFunctionReturn(PETSC_SUCCESS);
217 }
218 
219 /**
220   @brief Invert `MATCEED` variable diagonal block for Jacobi.
221 
222   Collective across MPI processes.
223 
224   @param[in]   mat_ceed     `MATCEED` to invert
225   @param[in]   num_blocks   The number of blocks on the process
226   @param[in]   block_sizes  The size of each block on the process
227   @param[out]  values       The block inverses in column major order
228 
229   @return An error code: 0 - success, otherwise - failure
230 **/
231 static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) {
232   Mat            mat_inner = NULL;
233   MatCeedContext ctx;
234 
235   PetscFunctionBeginUser;
236   PetscCall(MatShellGetContext(mat_ceed, &ctx));
237 
238   // Assemble inner mat if needed
239   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner));
240 
241   // Invert PB diagonal
242   PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values));
243   PetscFunctionReturn(PETSC_SUCCESS);
244 }
245 
246 /**
247   @brief View `MATCEED`.
248 
249   Collective across MPI processes.
250 
251   @param[in]   mat_ceed  `MATCEED` to view
252   @param[in]   viewer    The visualization context
253 
254   @return An error code: 0 - success, otherwise - failure
255 **/
256 static PetscErrorCode MatView_Ceed(Mat mat_ceed, PetscViewer viewer) {
257   PetscBool         is_ascii;
258   PetscViewerFormat format;
259   PetscMPIInt       size, rank;
260   MatCeedContext    ctx;
261 
262   PetscFunctionBeginUser;
263   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
264   PetscCall(MatShellGetContext(mat_ceed, &ctx));
265   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)mat_ceed), &viewer));
266 
267   PetscCall(PetscViewerGetFormat(viewer, &format));
268   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat_ceed), &size));
269   if (size == 1 && format == PETSC_VIEWER_LOAD_BALANCE) PetscFunctionReturn(PETSC_SUCCESS);
270 
271   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat_ceed), &rank));
272   if (rank != 0) PetscFunctionReturn(PETSC_SUCCESS);
273 
274   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &is_ascii));
275   {
276     PetscBool is_detailed     = format == PETSC_VIEWER_ASCII_INFO_DETAIL;
277     char      rank_string[16] = {'\0'};
278     FILE     *file;
279 
280     PetscCall(PetscViewerASCIIPrintf(viewer, "MatCEED:\n  Default COO MatType:%s\n", ctx->coo_mat_type));
281     PetscCall(PetscViewerASCIIGetPointer(viewer, &file));
282     PetscCall(PetscSNPrintf(rank_string, 16, "on Rank %d", rank));
283     PetscCall(PetscViewerASCIIPrintf(viewer, " CeedOperator Apply %s:\n", is_detailed ? rank_string : "Summary"));
284     if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult, file));
285     else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult, file));
286     if (ctx->op_mult_transpose) {
287       PetscCall(PetscViewerASCIIPrintf(viewer, "  CeedOperator ApplyTranspose %s:\n", is_detailed ? rank_string : "Summary"));
288       if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult_transpose, file));
289       else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult_transpose, file));
290     }
291   }
292   PetscFunctionReturn(PETSC_SUCCESS);
293 }
294 
295 // -----------------------------------------------------------------------------
296 // MatCeed
297 // -----------------------------------------------------------------------------
298 
299 /**
300   @brief Create PETSc `Mat` from libCEED operators.
301 
302   Collective across MPI processes.
303 
304   @param[in]   dm_x                      Input `DM`
305   @param[in]   dm_y                      Output `DM`
306   @param[in]   op_mult                   `CeedOperator` for forward evaluation
307   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
308   @param[out]  mat                        New MatCeed
309 
310   @return An error code: 0 - success, otherwise - failure
311 **/
312 PetscErrorCode MatCreateCeed(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) {
313   PetscInt       X_l_size, X_g_size, Y_l_size, Y_g_size;
314   VecType        vec_type;
315   MatCeedContext ctx;
316 
317   PetscFunctionBeginUser;
318   PetscCall(MatCeedRegisterLogEvents());
319 
320   // Collect context data
321   PetscCall(DMGetVecType(dm_x, &vec_type));
322   {
323     Vec X;
324 
325     PetscCall(DMGetGlobalVector(dm_x, &X));
326     PetscCall(VecGetSize(X, &X_g_size));
327     PetscCall(VecGetLocalSize(X, &X_l_size));
328     PetscCall(DMRestoreGlobalVector(dm_x, &X));
329   }
330   if (dm_y) {
331     Vec Y;
332 
333     PetscCall(DMGetGlobalVector(dm_y, &Y));
334     PetscCall(VecGetSize(Y, &Y_g_size));
335     PetscCall(VecGetLocalSize(Y, &Y_l_size));
336     PetscCall(DMRestoreGlobalVector(dm_y, &Y));
337   } else {
338     dm_y     = dm_x;
339     Y_g_size = X_g_size;
340     Y_l_size = X_l_size;
341   }
342 
343   // Create context
344   {
345     Vec X_loc, Y_loc_transpose = NULL;
346 
347     PetscCall(DMCreateLocalVector(dm_x, &X_loc));
348     PetscCall(VecZeroEntries(X_loc));
349     if (op_mult_transpose) {
350       PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose));
351       PetscCall(VecZeroEntries(Y_loc_transpose));
352     }
353     PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE,
354                                    MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE_CEEDOP, &ctx));
355     PetscCall(VecDestroy(&X_loc));
356     PetscCall(VecDestroy(&Y_loc_transpose));
357   }
358 
359   // Create mat
360   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat));
361   PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED));
362   // -- Set block and variable block sizes
363   if (dm_x == dm_y) {
364     MatType dm_mat_type, dm_mat_type_copy;
365     Mat     temp_mat;
366 
367     PetscCall(DMGetMatType(dm_x, &dm_mat_type));
368     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
369     PetscCall(DMSetMatType(dm_x, MATAIJ));
370     PetscCall(DMCreateMatrix(dm_x, &temp_mat));
371     PetscCall(DMSetMatType(dm_x, dm_mat_type_copy));
372     PetscCall(PetscFree(dm_mat_type_copy));
373 
374     {
375       PetscInt        block_size, num_blocks, max_vblock_size = PETSC_INT_MAX;
376       const PetscInt *vblock_sizes;
377 
378       // -- Get block sizes
379       PetscCall(MatGetBlockSize(temp_mat, &block_size));
380       PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes));
381       {
382         PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX};
383 
384         for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]);
385         PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max));
386         max_vblock_size = global_min_max[1];
387       }
388 
389       // -- Copy block sizes
390       if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size));
391       if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes));
392 
393       // -- Check libCEED compatibility
394       {
395         bool is_composite;
396 
397         ctx->is_ceed_pbd_valid  = PETSC_TRUE;
398         ctx->is_ceed_vpbd_valid = PETSC_TRUE;
399         PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite));
400         if (is_composite) {
401           CeedInt       num_sub_operators;
402           CeedOperator *sub_operators;
403 
404           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators));
405           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators));
406           for (CeedInt i = 0; i < num_sub_operators; i++) {
407             CeedInt                  num_bases, num_comp;
408             CeedBasis               *active_bases;
409             CeedOperatorAssemblyData assembly_data;
410 
411             PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data));
412             PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
413             PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
414             if (num_bases > 1) {
415               ctx->is_ceed_pbd_valid  = PETSC_FALSE;
416               ctx->is_ceed_vpbd_valid = PETSC_FALSE;
417             }
418             if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
419             if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
420           }
421         } else {
422           // LCOV_EXCL_START
423           CeedInt                  num_bases, num_comp;
424           CeedBasis               *active_bases;
425           CeedOperatorAssemblyData assembly_data;
426 
427           PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data));
428           PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
429           PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
430           if (num_bases > 1) {
431             ctx->is_ceed_pbd_valid  = PETSC_FALSE;
432             ctx->is_ceed_vpbd_valid = PETSC_FALSE;
433           }
434           if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
435           if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
436           // LCOV_EXCL_STOP
437         }
438         {
439           PetscInt local_is_valid[2], global_is_valid[2];
440 
441           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid;
442           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
443           ctx->is_ceed_pbd_valid = global_is_valid[0];
444           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid;
445           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
446           ctx->is_ceed_vpbd_valid = global_is_valid[0];
447         }
448       }
449     }
450     PetscCall(MatDestroy(&temp_mat));
451   }
452   // -- Set internal mat type
453   {
454     VecType vec_type;
455     MatType coo_mat_type;
456 
457     PetscCall(VecGetType(ctx->X_loc, &vec_type));
458     if (strstr(vec_type, VECCUDA)) coo_mat_type = MATAIJCUSPARSE;
459     else if (strstr(vec_type, VECKOKKOS)) coo_mat_type = MATAIJKOKKOS;
460     else coo_mat_type = MATAIJ;
461     PetscCall(PetscStrallocpy(coo_mat_type, &ctx->coo_mat_type));
462   }
463   // -- Set mat operations
464   PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
465   PetscCall(MatShellSetOperation(*mat, MATOP_VIEW, (void (*)(void))MatView_Ceed));
466   PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed));
467   if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
468   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
469   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
470   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
471   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
472   PetscCall(MatShellSetVecType(*mat, vec_type));
473   PetscFunctionReturn(PETSC_SUCCESS);
474 }
475 
476 /**
477   @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`.
478 
479   Collective across MPI processes.
480 
481   @param[in]   mat_ceed   `MATCEED` to copy from
482   @param[out]  mat_other  `MatShell` or `MATCEED` to copy into
483 
484   @return An error code: 0 - success, otherwise - failure
485 **/
486 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) {
487   PetscFunctionBeginUser;
488   PetscCall(MatCeedRegisterLogEvents());
489 
490   // Check type compatibility
491   {
492     PetscBool is_matceed = PETSC_FALSE, is_matshell = PETSC_FALSE;
493     MatType   mat_type_ceed, mat_type_other;
494 
495     PetscCall(MatGetType(mat_ceed, &mat_type_ceed));
496     PetscCall(PetscStrcmp(mat_type_ceed, MATCEED, &is_matceed));
497     PetscCheck(is_matceed, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED);
498     PetscCall(MatGetType(mat_other, &mat_type_other));
499     PetscCall(PetscStrcmp(mat_type_other, MATCEED, &is_matceed));
500     PetscCall(PetscStrcmp(mat_type_other, MATSHELL, &is_matceed));
501     PetscCheck(is_matceed || is_matshell, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_other must have type " MATCEED " or " MATSHELL);
502   }
503 
504   // Check dimension compatibility
505   {
506     PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size;
507 
508     PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size));
509     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size));
510     PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size));
511     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size));
512     PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) &&
513                    (X_l_ceed_size == X_l_other_size),
514                PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ,
515                "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT
516                "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT
517                ", %" PetscInt_FMT ")",
518                Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size);
519   }
520 
521   // Convert
522   {
523     VecType        vec_type;
524     MatCeedContext ctx;
525 
526     PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED));
527     PetscCall(MatShellGetContext(mat_ceed, &ctx));
528     PetscCall(MatCeedContextReference(ctx));
529     PetscCall(MatShellSetContext(mat_other, ctx));
530     PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
531     PetscCall(MatShellSetOperation(mat_other, MATOP_VIEW, (void (*)(void))MatView_Ceed));
532     PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed));
533     if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
534     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
535     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
536     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
537     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
538     {
539       PetscInt block_size;
540 
541       PetscCall(MatGetBlockSize(mat_ceed, &block_size));
542       if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size));
543     }
544     {
545       PetscInt        num_blocks;
546       const PetscInt *block_sizes;
547 
548       PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes));
549       if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes));
550     }
551     PetscCall(DMGetVecType(ctx->dm_x, &vec_type));
552     PetscCall(MatShellSetVecType(mat_other, vec_type));
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 /**
558   @brief Mark Mark `CeedQFunction` data as updated and the `CeedQFunction` as requiring re-assembly for a `MatCEED`.
559 
560   Collective across MPI processes.
561 
562   @param[in]   mat_ceed       `MATCEED`
563   @param[out]  update_needed  Boolean flag indicating `CeedQFunction` update needed
564 
565   @return An error code: 0 - success, otherwise - failure
566 **/
567 PetscErrorCode MatCeedSetAssemblyDataUpdateNeeded(Mat mat_ceed, PetscBool update_needed) {
568   MatCeedContext ctx;
569 
570   PetscFunctionBeginUser;
571   PetscCall(MatShellGetContext(mat_ceed, &ctx));
572   PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult, update_needed));
573   if (ctx->op_mult_transpose) {
574     PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult_transpose, update_needed));
575   }
576   PetscFunctionReturn(PETSC_SUCCESS);
577 }
578 
579 /**
580   @brief Setup a `Mat` with the same COO pattern as a `MatCEED`.
581 
582   Collective across MPI processes.
583 
584   @param[in]   mat_ceed  `MATCEED`
585   @param[out]  mat_coo   Sparse `Mat` with same COO pattern
586 
587   @return An error code: 0 - success, otherwise - failure
588 **/
589 PetscErrorCode MatCeedCreateMatCOO(Mat mat_ceed, Mat *mat_coo) {
590   MatCeedContext ctx;
591 
592   PetscFunctionBeginUser;
593   PetscCall(MatShellGetContext(mat_ceed, &ctx));
594 
595   PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "COO assembly only supported for MATCEED on a single DM");
596 
597   // Check cl mat type
598   {
599     PetscBool is_coo_mat_type_cl = PETSC_FALSE;
600     char      coo_mat_type_cl[64];
601 
602     // Check for specific CL coo mat type for this Mat
603     {
604       const char *mat_ceed_prefix = NULL;
605 
606       PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix));
607       PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL);
608       PetscCall(PetscOptionsFList("-ceed_coo_mat_type", "Default MATCEED COO assembly MatType", NULL, MatList, coo_mat_type_cl, coo_mat_type_cl,
609                                   sizeof(coo_mat_type_cl), &is_coo_mat_type_cl));
610       PetscOptionsEnd();
611       if (is_coo_mat_type_cl) {
612         PetscCall(PetscFree(ctx->coo_mat_type));
613         PetscCall(PetscStrallocpy(coo_mat_type_cl, &ctx->coo_mat_type));
614       }
615     }
616   }
617 
618   // Create sparse matrix
619   {
620     MatType dm_mat_type, dm_mat_type_copy;
621 
622     PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type));
623     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
624     PetscCall(DMSetMatType(ctx->dm_x, ctx->coo_mat_type));
625     PetscCall(DMCreateMatrix(ctx->dm_x, mat_coo));
626     PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy));
627     PetscCall(PetscFree(dm_mat_type_copy));
628   }
629   PetscFunctionReturn(PETSC_SUCCESS);
630 }
631 
632 /**
633   @brief Setup the COO preallocation `MATCEED` into a `MATAIJ` or similar.
634          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
635 
636   Collective across MPI processes.
637 
638   @param[in]      mat_ceed  `MATCEED` to assemble
639   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
640 
641   @return An error code: 0 - success, otherwise - failure
642 **/
643 PetscErrorCode MatCeedSetPreallocationCOO(Mat mat_ceed, Mat mat_coo) {
644   MatCeedContext ctx;
645 
646   PetscFunctionBeginUser;
647   PetscCall(MatShellGetContext(mat_ceed, &ctx));
648 
649   {
650     PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
651     CeedInt      *rows_ceed, *cols_ceed;
652     PetscCount    num_entries;
653     PetscLogStage stage_amg_setup;
654 
655     // -- Assemble sparsity pattern if mat hasn't been assembled before
656     PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
657     if (stage_amg_setup == -1) {
658       PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
659     }
660     PetscCall(PetscLogStagePush(stage_amg_setup));
661     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
662     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
663     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
664     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
665     PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
666     PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
667     PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
668     free(rows_petsc);
669     free(cols_petsc);
670     if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full));
671     PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full));
672     ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo;
673     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
674     PetscCall(PetscLogStagePop());
675   }
676   PetscFunctionReturn(PETSC_SUCCESS);
677 }
678 
679 /**
680   @brief Assemble a `MATCEED` into a `MATAIJ` or similar.
681          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
682          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
683 
684   Collective across MPI processes.
685 
686   @param[in]      mat_ceed  `MATCEED` to assemble
687   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
688 
689   @return An error code: 0 - success, otherwise - failure
690 **/
691 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) {
692   MatCeedContext ctx;
693 
694   PetscFunctionBeginUser;
695   PetscCall(MatShellGetContext(mat_ceed, &ctx));
696 
697   // Set COO pattern if needed
698   {
699     CeedInt index = -1;
700 
701     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
702       if (ctx->mats_assembled_full[i] == mat_coo) index = i;
703     }
704     if (index == -1) PetscCall(MatCeedSetPreallocationCOO(mat_ceed, mat_coo));
705   }
706 
707   // Assemble mat_ceed
708   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
709   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
710   {
711     const CeedScalar *values;
712     MatType           mat_type;
713     CeedMemType       mem_type = CEED_MEM_HOST;
714     PetscBool         is_spd, is_spd_known;
715 
716     PetscCall(MatGetType(mat_coo, &mat_type));
717     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
718     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
719     else mem_type = CEED_MEM_HOST;
720 
721     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
722     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full));
723     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
724     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values));
725     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
726     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
727     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
728     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values));
729   }
730   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
731   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
732   PetscFunctionReturn(PETSC_SUCCESS);
733 }
734 
735 /**
736   @brief Set the current value of a context field for a `MatCEED`.
737 
738   Not collective across MPI processes.
739 
740   @param[in,out]  mat    `MatCEED`
741   @param[in]      name   Name of the context field
742   @param[in]      value  New context field value
743 
744   @return An error code: 0 - success, otherwise - failure
745 **/
746 PetscErrorCode MatCeedSetContextDouble(Mat mat, const char *name, double value) {
747   PetscBool      was_updated = PETSC_FALSE;
748   MatCeedContext ctx;
749 
750   PetscFunctionBeginUser;
751   PetscCall(MatShellGetContext(mat, &ctx));
752   {
753     CeedContextFieldLabel label = NULL;
754 
755     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult, name, &label));
756     if (label) {
757       double set_value = 2 * value + 1.0;
758 
759       PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
760       if (set_value != value) {
761         PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult, label, &value));
762         was_updated = PETSC_TRUE;
763       }
764     }
765     if (ctx->op_mult_transpose) {
766       label = NULL;
767       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult_transpose, name, &label));
768       if (label) {
769         double set_value = 2 * value + 1.0;
770 
771         PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
772         if (set_value != value) {
773           PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult_transpose, label, &value));
774           was_updated = PETSC_TRUE;
775         }
776       }
777     }
778   }
779   if (was_updated) {
780     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
781     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
782   }
783   PetscFunctionReturn(PETSC_SUCCESS);
784 }
785 
786 /**
787   @brief Get the current value of a context field for a `MatCEED`.
788 
789   Not collective across MPI processes.
790 
791   @param[in]   mat    `MatCEED`
792   @param[in]   name   Name of the context field
793   @param[out]  value  Current context field value
794 
795   @return An error code: 0 - success, otherwise - failure
796 **/
797 PetscErrorCode MatCeedGetContextDouble(Mat mat, const char *name, double *value) {
798   MatCeedContext ctx;
799 
800   PetscFunctionBeginUser;
801   PetscCall(MatShellGetContext(mat, &ctx));
802   {
803     CeedContextFieldLabel label = NULL;
804     CeedOperator          op    = ctx->op_mult;
805 
806     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
807     if (!label && ctx->op_mult_transpose) {
808       op = ctx->op_mult_transpose;
809       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
810     }
811     if (label) {
812       PetscSizeT    num_values;
813       const double *values_ceed;
814 
815       PetscCallCeed(ctx->ceed, CeedOperatorGetContextDoubleRead(op, label, &num_values, &values_ceed));
816       *value = values_ceed[0];
817       PetscCallCeed(ctx->ceed, CeedOperatorRestoreContextDoubleRead(op, label, &values_ceed));
818     }
819   }
820   PetscFunctionReturn(PETSC_SUCCESS);
821 }
822 
823 /**
824   @brief Set the current `PetscReal` value of a context field for a `MatCEED`.
825 
826   Not collective across MPI processes.
827 
828   @param[in,out]  mat    `MatCEED`
829   @param[in]      name   Name of the context field
830   @param[in]      value  New context field value
831 
832   @return An error code: 0 - success, otherwise - failure
833 **/
834 PetscErrorCode MatCeedSetContextReal(Mat mat, const char *name, PetscReal value) {
835   double value_double = value;
836 
837   PetscFunctionBeginUser;
838   PetscCall(MatCeedSetContextDouble(mat, name, value_double));
839   PetscFunctionReturn(PETSC_SUCCESS);
840 }
841 
842 /**
843   @brief Get the current `PetscReal` value of a context field for a `MatCEED`.
844 
845   Not collective across MPI processes.
846 
847   @param[in]   mat    `MatCEED`
848   @param[in]   name   Name of the context field
849   @param[out]  value  Current context field value
850 
851   @return An error code: 0 - success, otherwise - failure
852 **/
853 PetscErrorCode MatCeedGetContextReal(Mat mat, const char *name, PetscReal *value) {
854   double value_double = 0.0;
855 
856   PetscFunctionBeginUser;
857   PetscCall(MatCeedGetContextDouble(mat, name, &value_double));
858   *value = value_double;
859   PetscFunctionReturn(PETSC_SUCCESS);
860 }
861 
862 /**
863   @brief Set the current time for a `MatCEED`.
864 
865   Not collective across MPI processes.
866 
867   @param[in,out]  mat   `MatCEED`
868   @param[in]      time  Current time
869 
870   @return An error code: 0 - success, otherwise - failure
871 **/
872 PetscErrorCode MatCeedSetTime(Mat mat, PetscReal time) {
873   PetscFunctionBeginUser;
874   {
875     double time_ceed = time;
876 
877     PetscCall(MatCeedSetContextDouble(mat, "time", time_ceed));
878   }
879   PetscFunctionReturn(PETSC_SUCCESS);
880 }
881 
882 /**
883   @brief Get the current time for a `MatCEED`.
884 
885   Not collective across MPI processes.
886 
887   @param[in]   mat   `MatCEED`
888   @param[out]  time  Current time, or -1.0 if the boundary evaluator has no time field
889 
890   @return An error code: 0 - success, otherwise - failure
891 **/
892 PetscErrorCode MatCeedGetTime(Mat mat, PetscReal *time) {
893   PetscFunctionBeginUser;
894   *time = -1.0;
895   {
896     double time_ceed = -1.0;
897 
898     PetscCall(MatCeedGetContextDouble(mat, "time", &time_ceed));
899     *time = time_ceed;
900   }
901   PetscFunctionReturn(PETSC_SUCCESS);
902 }
903 
904 /**
905   @brief Set the current time step for a `MatCEED`.
906 
907   Not collective across MPI processes.
908 
909   @param[in,out]  mat  `MatCEED`
910   @param[in]      dt   Current time step
911 
912   @return An error code: 0 - success, otherwise - failure
913 **/
914 PetscErrorCode MatCeedSetDt(Mat mat, PetscReal dt) {
915   PetscFunctionBeginUser;
916   {
917     double dt_ceed = dt;
918 
919     PetscCall(MatCeedSetContextDouble(mat, "dt", dt_ceed));
920   }
921   PetscFunctionReturn(PETSC_SUCCESS);
922 }
923 
924 /**
925   @brief Set the Jacobian shifts for a `MatCEED`.
926 
927   Not collective across MPI processes.
928 
929   @param[in,out]  mat      `MatCEED`
930   @param[in]      shift_v  Velocity shift
931   @param[in]      shift_a  Acceleration shift
932 
933   @return An error code: 0 - success, otherwise - failure
934 **/
935 PetscErrorCode MatCeedSetShifts(Mat mat, PetscReal shift_v, PetscReal shift_a) {
936   PetscFunctionBeginUser;
937   {
938     double shift_v_ceed = shift_v;
939 
940     PetscCall(MatCeedSetContextDouble(mat, "shift v", shift_v_ceed));
941   }
942   if (shift_a) {
943     double shift_a_ceed = shift_a;
944 
945     PetscCall(MatCeedSetContextDouble(mat, "shift a", shift_a_ceed));
946   }
947   PetscFunctionReturn(PETSC_SUCCESS);
948 }
949 
950 /**
951   @brief Set user context for a `MATCEED`.
952 
953   Collective across MPI processes.
954 
955   @param[in,out]  mat  `MATCEED`
956   @param[in]      f    The context destroy function, or NULL
957   @param[in]      ctx  User context, or NULL to unset
958 
959   @return An error code: 0 - success, otherwise - failure
960 **/
961 PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) {
962   PetscContainer user_ctx = NULL;
963 
964   PetscFunctionBeginUser;
965   if (ctx) {
966     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx));
967     PetscCall(PetscContainerSetPointer(user_ctx, ctx));
968     PetscCall(PetscContainerSetUserDestroy(user_ctx, f));
969   }
970   PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx));
971   PetscCall(PetscContainerDestroy(&user_ctx));
972   PetscFunctionReturn(PETSC_SUCCESS);
973 }
974 
975 /**
976   @brief Retrieve the user context for a `MATCEED`.
977 
978   Collective across MPI processes.
979 
980   @param[in,out]  mat  `MATCEED`
981   @param[in]      ctx  User context
982 
983   @return An error code: 0 - success, otherwise - failure
984 **/
985 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) {
986   PetscContainer user_ctx;
987 
988   PetscFunctionBeginUser;
989   PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx));
990   if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx));
991   else *(void **)ctx = NULL;
992   PetscFunctionReturn(PETSC_SUCCESS);
993 }
994 /**
995   @brief Set a user defined matrix operation for a `MATCEED` matrix.
996 
997   Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by
998 `MatCeedSetContext()`.
999 
1000   Collective across MPI processes.
1001 
1002   @param[in,out]  mat  `MATCEED`
1003   @param[in]      op   Name of the `MatOperation`
1004   @param[in]      g    Function that provides the operation
1005 
1006   @return An error code: 0 - success, otherwise - failure
1007 **/
1008 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) {
1009   PetscFunctionBeginUser;
1010   PetscCall(MatShellSetOperation(mat, op, g));
1011   PetscFunctionReturn(PETSC_SUCCESS);
1012 }
1013 
1014 /**
1015   @brief Sets the default COO matrix type as a string from the `MATCEED`.
1016 
1017   Collective across MPI processes.
1018 
1019   @param[in,out]  mat   `MATCEED`
1020   @param[in]      type  COO `MatType` to set
1021 
1022   @return An error code: 0 - success, otherwise - failure
1023 **/
1024 PetscErrorCode MatCeedSetCOOMatType(Mat mat, MatType type) {
1025   MatCeedContext ctx;
1026 
1027   PetscFunctionBeginUser;
1028   PetscCall(MatShellGetContext(mat, &ctx));
1029   // Check if same
1030   {
1031     size_t    len_old, len_new;
1032     PetscBool is_same = PETSC_FALSE;
1033 
1034     PetscCall(PetscStrlen(ctx->coo_mat_type, &len_old));
1035     PetscCall(PetscStrlen(type, &len_new));
1036     if (len_old == len_new) PetscCall(PetscStrncmp(ctx->coo_mat_type, type, len_old, &is_same));
1037     if (is_same) PetscFunctionReturn(PETSC_SUCCESS);
1038   }
1039   // Clean up old mats in different format
1040   // LCOV_EXCL_START
1041   if (ctx->mat_assembled_full_internal) {
1042     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
1043       if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) {
1044         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) {
1045           ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j];
1046         }
1047         ctx->num_mats_assembled_full--;
1048         // Note: we'll realloc this array again, so no need to shrink the allocation
1049         PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1050       }
1051     }
1052   }
1053   if (ctx->mat_assembled_pbd_internal) {
1054     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
1055       if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) {
1056         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) {
1057           ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j];
1058         }
1059         // Note: we'll realloc this array again, so no need to shrink the allocation
1060         ctx->num_mats_assembled_pbd--;
1061         PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1062       }
1063     }
1064   }
1065   PetscCall(PetscFree(ctx->coo_mat_type));
1066   PetscCall(PetscStrallocpy(type, &ctx->coo_mat_type));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068   // LCOV_EXCL_STOP
1069 }
1070 
1071 /**
1072   @brief Gets the default COO matrix type as a string from the `MATCEED`.
1073 
1074   Collective across MPI processes.
1075 
1076   @param[in,out]  mat   `MATCEED`
1077   @param[in]      type  COO `MatType`
1078 
1079   @return An error code: 0 - success, otherwise - failure
1080 **/
1081 PetscErrorCode MatCeedGetCOOMatType(Mat mat, MatType *type) {
1082   MatCeedContext ctx;
1083 
1084   PetscFunctionBeginUser;
1085   PetscCall(MatShellGetContext(mat, &ctx));
1086   *type = ctx->coo_mat_type;
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 /**
1091   @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1092 
1093   Not collective across MPI processes.
1094 
1095   @param[in,out]  mat              `MATCEED`
1096   @param[in]      X_loc            Input PETSc local vector, or NULL
1097   @param[in]      Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1098 
1099   @return An error code: 0 - success, otherwise - failure
1100 **/
1101 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) {
1102   MatCeedContext ctx;
1103 
1104   PetscFunctionBeginUser;
1105   PetscCall(MatShellGetContext(mat, &ctx));
1106   if (X_loc) {
1107     PetscInt len_old, len_new;
1108 
1109     PetscCall(VecGetSize(ctx->X_loc, &len_old));
1110     PetscCall(VecGetSize(X_loc, &len_new));
1111     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT,
1112                len_new, len_old);
1113     PetscCall(VecReferenceCopy(X_loc, &ctx->X_loc));
1114   }
1115   if (Y_loc_transpose) {
1116     PetscInt len_old, len_new;
1117 
1118     PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old));
1119     PetscCall(VecGetSize(Y_loc_transpose, &len_new));
1120     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB,
1121                "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old);
1122     PetscCall(VecReferenceCopy(Y_loc_transpose, &ctx->Y_loc_transpose));
1123   }
1124   PetscFunctionReturn(PETSC_SUCCESS);
1125 }
1126 
1127 /**
1128   @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1129 
1130   Not collective across MPI processes.
1131 
1132   @param[in,out]  mat              `MATCEED`
1133   @param[out]     X_loc            Input PETSc local vector, or NULL
1134   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1135 
1136   @return An error code: 0 - success, otherwise - failure
1137 **/
1138 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1139   MatCeedContext ctx;
1140 
1141   PetscFunctionBeginUser;
1142   PetscCall(MatShellGetContext(mat, &ctx));
1143   if (X_loc) {
1144     *X_loc = NULL;
1145     PetscCall(VecReferenceCopy(ctx->X_loc, X_loc));
1146   }
1147   if (Y_loc_transpose) {
1148     *Y_loc_transpose = NULL;
1149     PetscCall(VecReferenceCopy(ctx->Y_loc_transpose, Y_loc_transpose));
1150   }
1151   PetscFunctionReturn(PETSC_SUCCESS);
1152 }
1153 
1154 /**
1155   @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1156 
1157   Not collective across MPI processes.
1158 
1159   @param[in,out]  mat              MatCeed
1160   @param[out]     X_loc            Input PETSc local vector, or NULL
1161   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1162 
1163   @return An error code: 0 - success, otherwise - failure
1164 **/
1165 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1166   PetscFunctionBeginUser;
1167   if (X_loc) PetscCall(VecDestroy(X_loc));
1168   if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose));
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 /**
1173   @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1174 
1175   Not collective across MPI processes.
1176 
1177   @param[in,out]  mat                MatCeed
1178   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1179   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1180 
1181   @return An error code: 0 - success, otherwise - failure
1182 **/
1183 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1184   MatCeedContext ctx;
1185 
1186   PetscFunctionBeginUser;
1187   PetscCall(MatShellGetContext(mat, &ctx));
1188   if (op_mult) {
1189     *op_mult = NULL;
1190     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult));
1191   }
1192   if (op_mult_transpose) {
1193     *op_mult_transpose = NULL;
1194     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose));
1195   }
1196   PetscFunctionReturn(PETSC_SUCCESS);
1197 }
1198 
1199 /**
1200   @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1201 
1202   Not collective across MPI processes.
1203 
1204   @param[in,out]  mat                MatCeed
1205   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1206   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1207 
1208   @return An error code: 0 - success, otherwise - failure
1209 **/
1210 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1211   MatCeedContext ctx;
1212 
1213   PetscFunctionBeginUser;
1214   PetscCall(MatShellGetContext(mat, &ctx));
1215   if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult));
1216   if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose));
1217   PetscFunctionReturn(PETSC_SUCCESS);
1218 }
1219 
1220 /**
1221   @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1222 
1223   Not collective across MPI processes.
1224 
1225   @param[in,out]  mat                       MatCeed
1226   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1227   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1228 
1229   @return An error code: 0 - success, otherwise - failure
1230 **/
1231 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1232   MatCeedContext ctx;
1233 
1234   PetscFunctionBeginUser;
1235   PetscCall(MatShellGetContext(mat, &ctx));
1236   if (log_event_mult) ctx->log_event_mult = log_event_mult;
1237   if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose;
1238   PetscFunctionReturn(PETSC_SUCCESS);
1239 }
1240 
1241 /**
1242   @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1243 
1244   Not collective across MPI processes.
1245 
1246   @param[in,out]  mat                       MatCeed
1247   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1248   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1249 
1250   @return An error code: 0 - success, otherwise - failure
1251 **/
1252 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1253   MatCeedContext ctx;
1254 
1255   PetscFunctionBeginUser;
1256   PetscCall(MatShellGetContext(mat, &ctx));
1257   if (log_event_mult) *log_event_mult = ctx->log_event_mult;
1258   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose;
1259   PetscFunctionReturn(PETSC_SUCCESS);
1260 }
1261 
1262 /**
1263   @brief Set `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1264 
1265   Not collective across MPI processes.
1266 
1267   @param[in,out]  mat                       MatCeed
1268   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1269   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1270 
1271   @return An error code: 0 - success, otherwise - failure
1272 **/
1273 PetscErrorCode MatCeedSetCeedOperatorLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1274   MatCeedContext ctx;
1275 
1276   PetscFunctionBeginUser;
1277   PetscCall(MatShellGetContext(mat, &ctx));
1278   if (log_event_mult) ctx->log_event_ceed_mult = log_event_mult;
1279   if (log_event_mult_transpose) ctx->log_event_ceed_mult_transpose = log_event_mult_transpose;
1280   PetscFunctionReturn(PETSC_SUCCESS);
1281 }
1282 
1283 /**
1284   @brief Get `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1285 
1286   Not collective across MPI processes.
1287 
1288   @param[in,out]  mat                       MatCeed
1289   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1290   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1291 
1292   @return An error code: 0 - success, otherwise - failure
1293 **/
1294 PetscErrorCode MatCeedGetCeedOperatorLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1295   MatCeedContext ctx;
1296 
1297   PetscFunctionBeginUser;
1298   PetscCall(MatShellGetContext(mat, &ctx));
1299   if (log_event_mult) *log_event_mult = ctx->log_event_ceed_mult;
1300   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_ceed_mult_transpose;
1301   PetscFunctionReturn(PETSC_SUCCESS);
1302 }
1303 
1304 // -----------------------------------------------------------------------------
1305 // Operator context data
1306 // -----------------------------------------------------------------------------
1307 
1308 /**
1309   @brief Setup context data for operator application.
1310 
1311   Collective across MPI processes.
1312 
1313   @param[in]   dm_x                           Input `DM`
1314   @param[in]   dm_y                           Output `DM`
1315   @param[in]   X_loc                          Input PETSc local vector, or NULL
1316   @param[in]   Y_loc_transpose                Input PETSc local vector for transpose operation, or NULL
1317   @param[in]   op_mult                        `CeedOperator` for forward evaluation
1318   @param[in]   op_mult_transpose              `CeedOperator` for transpose evaluation
1319   @param[in]   log_event_mult                 `PetscLogEvent` for forward evaluation
1320   @param[in]   log_event_mult_transpose       `PetscLogEvent` for transpose evaluation
1321   @param[in]   log_event_ceed_mult            `PetscLogEvent` for forward `CeedOperator` evaluation
1322   @param[in]   log_event_ceed_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation
1323   @param[out]  ctx                            Context data for operator evaluation
1324 
1325   @return An error code: 0 - success, otherwise - failure
1326 **/
1327 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose,
1328                                     PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, PetscLogEvent log_event_ceed_mult,
1329                                     PetscLogEvent log_event_ceed_mult_transpose, MatCeedContext *ctx) {
1330   CeedSize x_loc_len, y_loc_len;
1331 
1332   PetscFunctionBeginUser;
1333 
1334   // Allocate
1335   PetscCall(PetscNew(ctx));
1336   (*ctx)->ref_count = 1;
1337 
1338   // Logging
1339   (*ctx)->log_event_mult                = log_event_mult;
1340   (*ctx)->log_event_mult_transpose      = log_event_mult_transpose;
1341   (*ctx)->log_event_ceed_mult           = log_event_ceed_mult;
1342   (*ctx)->log_event_ceed_mult_transpose = log_event_ceed_mult_transpose;
1343 
1344   // PETSc objects
1345   PetscCall(DMReferenceCopy(dm_x, &(*ctx)->dm_x));
1346   PetscCall(DMReferenceCopy(dm_y, &(*ctx)->dm_y));
1347   if (X_loc) PetscCall(VecReferenceCopy(X_loc, &(*ctx)->X_loc));
1348   if (Y_loc_transpose) PetscCall(VecReferenceCopy(Y_loc_transpose, &(*ctx)->Y_loc_transpose));
1349 
1350   // Memtype
1351   {
1352     const PetscScalar *x;
1353     Vec                X;
1354 
1355     PetscCall(DMGetLocalVector(dm_x, &X));
1356     PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type));
1357     PetscCall(VecRestoreArrayReadAndMemType(X, &x));
1358     PetscCall(DMRestoreLocalVector(dm_x, &X));
1359   }
1360 
1361   // libCEED objects
1362   PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB,
1363              "retrieving Ceed context object failed");
1364   PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed));
1365   PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len));
1366   PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult));
1367   if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose));
1368   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc));
1369   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc));
1370 
1371   // Flop counting
1372   {
1373     CeedSize ceed_flops_estimate = 0;
1374 
1375     PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate));
1376     (*ctx)->flops_mult = ceed_flops_estimate;
1377     if (op_mult_transpose) {
1378       PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate));
1379       (*ctx)->flops_mult_transpose = ceed_flops_estimate;
1380     }
1381   }
1382 
1383   // Check sizes
1384   if (x_loc_len > 0 || y_loc_len > 0) {
1385     CeedSize ctx_x_loc_len, ctx_y_loc_len;
1386     PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len;
1387     Vec      dm_X_loc, dm_Y_loc;
1388 
1389     // -- Input
1390     PetscCall(DMGetLocalVector(dm_x, &dm_X_loc));
1391     PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len));
1392     PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc));
1393     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len));
1394     if (X_loc) {
1395       PetscCall(VecGetLocalSize(X_loc, &X_loc_len));
1396       PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1397                  "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len);
1398     }
1399     PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions",
1400                x_loc_len, dm_x_loc_len);
1401     PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")",
1402                x_loc_len, ctx_x_loc_len);
1403 
1404     // -- Output
1405     PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc));
1406     PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len));
1407     PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc));
1408     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len));
1409     PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions",
1410                ctx_y_loc_len, dm_y_loc_len);
1411 
1412     // -- Transpose
1413     if (Y_loc_transpose) {
1414       PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len));
1415       PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1416                  "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len);
1417     }
1418   }
1419   PetscFunctionReturn(PETSC_SUCCESS);
1420 }
1421 
1422 /**
1423   @brief Increment reference counter for `MATCEED` context.
1424 
1425   Not collective across MPI processes.
1426 
1427   @param[in,out]  ctx  Context data
1428 
1429   @return An error code: 0 - success, otherwise - failure
1430 **/
1431 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) {
1432   PetscFunctionBeginUser;
1433   ctx->ref_count++;
1434   PetscFunctionReturn(PETSC_SUCCESS);
1435 }
1436 
1437 /**
1438   @brief Copy reference for `MATCEED`.
1439          Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`.
1440 
1441   Not collective across MPI processes.
1442 
1443   @param[in]   ctx       Context data
1444   @param[out]  ctx_copy  Copy of pointer to context data
1445 
1446   @return An error code: 0 - success, otherwise - failure
1447 **/
1448 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) {
1449   PetscFunctionBeginUser;
1450   PetscCall(MatCeedContextReference(ctx));
1451   PetscCall(MatCeedContextDestroy(*ctx_copy));
1452   *ctx_copy = ctx;
1453   PetscFunctionReturn(PETSC_SUCCESS);
1454 }
1455 
1456 /**
1457   @brief Destroy context data for operator application.
1458 
1459   Collective across MPI processes.
1460 
1461   @param[in,out]  ctx  Context data for operator evaluation
1462 
1463   @return An error code: 0 - success, otherwise - failure
1464 **/
1465 PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) {
1466   PetscFunctionBeginUser;
1467   if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS);
1468 
1469   // PETSc objects
1470   PetscCall(DMDestroy(&ctx->dm_x));
1471   PetscCall(DMDestroy(&ctx->dm_y));
1472   PetscCall(VecDestroy(&ctx->X_loc));
1473   PetscCall(VecDestroy(&ctx->Y_loc_transpose));
1474   PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1475   PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1476   PetscCall(PetscFree(ctx->coo_mat_type));
1477   PetscCall(PetscFree(ctx->mats_assembled_full));
1478   PetscCall(PetscFree(ctx->mats_assembled_pbd));
1479 
1480   // libCEED objects
1481   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc));
1482   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc));
1483   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full));
1484   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd));
1485   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult));
1486   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose));
1487   PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed");
1488 
1489   // Deallocate
1490   ctx->is_destroyed = PETSC_TRUE;  // Flag as destroyed in case someone has stale ref
1491   PetscCall(PetscFree(ctx));
1492   PetscFunctionReturn(PETSC_SUCCESS);
1493 }
1494 
1495 /**
1496   @brief Compute the diagonal of an operator via libCEED.
1497 
1498   Collective across MPI processes.
1499 
1500   @param[in]   A  `MATCEED`
1501   @param[out]  D  Vector holding operator diagonal
1502 
1503   @return An error code: 0 - success, otherwise - failure
1504 **/
1505 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) {
1506   PetscMemType   mem_type;
1507   Vec            D_loc;
1508   MatCeedContext ctx;
1509 
1510   PetscFunctionBeginUser;
1511   PetscCall(MatShellGetContext(A, &ctx));
1512 
1513   // Place PETSc vector in libCEED vector
1514   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1515   PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc));
1516   PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc));
1517 
1518   // Compute Diagonal
1519   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1520   PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1521   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1522 
1523   // Restore PETSc vector
1524   PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc));
1525 
1526   // Local-to-Global
1527   PetscCall(VecZeroEntries(D));
1528   PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D));
1529   PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc));
1530   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1531   PetscFunctionReturn(PETSC_SUCCESS);
1532 }
1533 
1534 /**
1535   @brief Compute `A X = Y` for a `MATCEED`.
1536 
1537   Collective across MPI processes.
1538 
1539   @param[in]   A  `MATCEED`
1540   @param[in]   X  Input PETSc vector
1541   @param[out]  Y  Output PETSc vector
1542 
1543   @return An error code: 0 - success, otherwise - failure
1544 **/
1545 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) {
1546   MatCeedContext ctx;
1547 
1548   PetscFunctionBeginUser;
1549   PetscCall(MatShellGetContext(A, &ctx));
1550   PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, NULL));
1551 
1552   {
1553     PetscMemType x_mem_type, y_mem_type;
1554     Vec          X_loc = ctx->X_loc, Y_loc;
1555 
1556     // Get local vectors
1557     if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1558     PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1559 
1560     // Global-to-local
1561     PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc));
1562 
1563     // Setup libCEED vectors
1564     PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1565     PetscCall(VecZeroEntries(Y_loc));
1566     PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1567 
1568     // Apply libCEED operator
1569     PetscCall(PetscLogGpuTimeBegin());
1570     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult, A, X, Y, NULL));
1571     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE));
1572     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult, A, X, Y, NULL));
1573     PetscCall(PetscLogGpuTimeEnd());
1574 
1575     // Restore PETSc vectors
1576     PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1577     PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1578 
1579     // Local-to-global
1580     PetscCall(VecZeroEntries(Y));
1581     PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y));
1582 
1583     // Restore local vectors, as needed
1584     if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1585     PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1586   }
1587 
1588   // Log flops
1589   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult));
1590   else PetscCall(PetscLogFlops(ctx->flops_mult));
1591   PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, NULL));
1592   PetscFunctionReturn(PETSC_SUCCESS);
1593 }
1594 
1595 /**
1596   @brief Compute `A^T Y = X` for a `MATCEED`.
1597 
1598   Collective across MPI processes.
1599 
1600   @param[in]   A  `MATCEED`
1601   @param[in]   Y  Input PETSc vector
1602   @param[out]  X  Output PETSc vector
1603 
1604   @return An error code: 0 - success, otherwise - failure
1605 **/
1606 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) {
1607   MatCeedContext ctx;
1608 
1609   PetscFunctionBeginUser;
1610   PetscCall(MatShellGetContext(A, &ctx));
1611   PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, NULL));
1612 
1613   {
1614     PetscMemType x_mem_type, y_mem_type;
1615     Vec          X_loc, Y_loc = ctx->Y_loc_transpose;
1616 
1617     // Get local vectors
1618     if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1619     PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1620 
1621     // Global-to-local
1622     PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc));
1623 
1624     // Setup libCEED vectors
1625     PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1626     PetscCall(VecZeroEntries(X_loc));
1627     PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1628 
1629     // Apply libCEED operator
1630     PetscCall(PetscLogGpuTimeBegin());
1631     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1632     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1633     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1634     PetscCall(PetscLogGpuTimeEnd());
1635 
1636     // Restore PETSc vectors
1637     PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1638     PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1639 
1640     // Local-to-global
1641     PetscCall(VecZeroEntries(X));
1642     PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X));
1643 
1644     // Restore local vectors, as needed
1645     if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1646     PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1647   }
1648 
1649   // Log flops
1650   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose));
1651   else PetscCall(PetscLogFlops(ctx->flops_mult_transpose));
1652   PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, NULL));
1653   PetscFunctionReturn(PETSC_SUCCESS);
1654 }
1655