xref: /honee/src/mat-ceed.c (revision cbdfeaf49eecfe108b28f98e5e7c308a9796a444)
1 // SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors.
2 // SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause
3 /// @file
4 /// MatCEED implementation
5 
6 #include <ceed.h>
7 #include <ceed/backend.h>
8 #include <mat-ceed-impl.h>
9 #include <mat-ceed.h>
10 #include <petsc-ceed-utils.h>
11 #include <petsc-ceed.h>
12 #include <petscdm.h>
13 #include <petscmat.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 
19 PetscClassId  MATCEED_CLASSID;
20 PetscLogEvent MATCEED_MULT, MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE, MATCEED_MULT_TRANSPOSE_CEEDOP, MATCEED_ASSEMBLE_DIAGONAL,
21     MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, MATCEED_SETUP_PBDIAGONAL, MATCEED_SETUP_PBDIAGONAL_CEEDOP, MATCEED_ASSEMBLE_PBDIAGONAL,
22     MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, MATCEED_SETUP_FULL, MATCEED_SETUP_FULL_CEEDOP, MATCEED_ASSEMBLE_FULL, MATCEED_ASSEMBLE_FULL_CEEDOP;
23 
24 /**
25   @brief Register MATCEED log events.
26 
27   Not collective across MPI processes.
28 
29   @return An error code: 0 - success, otherwise - failure
30 **/
31 static PetscErrorCode MatCeedRegisterLogEvents() {
32   static PetscBool registered = PETSC_FALSE;
33 
34   PetscFunctionBeginUser;
35   if (registered) PetscFunctionReturn(PETSC_SUCCESS);
36   PetscCall(PetscClassIdRegister("MatCEED", &MATCEED_CLASSID));
37   PetscCall(PetscLogEventRegister("MatCEEDMul", MATCEED_CLASSID, &MATCEED_MULT));
38   PetscCall(PetscLogEventRegister("MatCEEDMulCeed", MATCEED_CLASSID, &MATCEED_MULT_CEEDOP));
39   PetscCall(PetscLogEventRegister("MatCEEDMulT", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE));
40   PetscCall(PetscLogEventRegister("MatCEEDMulTCeed", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE_CEEDOP));
41   PetscCall(PetscLogEventRegister("MatCEEDAsmDiag", MATCEED_CLASSID, &MATCEED_ASSEMBLE_DIAGONAL));
42   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSU", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL));
43   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL_CEEDOP));
44   PetscCall(PetscLogEventRegister("MatCEEDAsmPBD", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL));
45   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP));
46   PetscCall(PetscLogEventRegister("MatCEEDAsmSU", MATCEED_CLASSID, &MATCEED_SETUP_FULL));
47   PetscCall(PetscLogEventRegister("MatCEEDAsmSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_FULL_CEEDOP));
48   PetscCall(PetscLogEventRegister("MatCEEDAsm", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL));
49   PetscCall(PetscLogEventRegister("MatCEEDAsmCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL_CEEDOP));
50   registered = PETSC_TRUE;
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 /**
55   @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar.
56          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
57          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
58 
59   Collective across MPI processes.
60 
61   @param[in]      mat_ceed  `MATCEED` to assemble
62   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
63 
64   @return An error code: 0 - success, otherwise - failure
65 **/
66 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) {
67   MatCeedContext ctx;
68 
69   PetscFunctionBeginUser;
70   PetscCall(MatShellGetContext(mat_ceed, &ctx));
71 
72   // Check if COO pattern set
73   {
74     PetscInt index = -1;
75 
76     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
77       if (ctx->mats_assembled_pbd[i] == mat_coo) index = i;
78     }
79     if (index == -1) {
80       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
81       CeedInt      *rows_ceed, *cols_ceed;
82       PetscCount    num_entries;
83       PetscLogStage stage_amg_setup;
84 
85       // -- Assemble sparsity pattern if mat hasn't been assembled before
86       PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
87       if (stage_amg_setup == -1) {
88         PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
89       }
90       PetscCall(PetscLogStagePush(stage_amg_setup));
91       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
92       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
93       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
94       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
95       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
96       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
97       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
98       free(rows_petsc);
99       free(cols_petsc);
100       if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd));
101       PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd));
102       ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo;
103       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
104       PetscCall(PetscLogStagePop());
105     }
106   }
107 
108   // Assemble mat_ceed
109   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
110   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
111   {
112     const CeedScalar *values;
113     MatType           mat_type;
114     CeedMemType       mem_type = CEED_MEM_HOST;
115     PetscBool         is_spd, is_spd_known;
116 
117     PetscCall(MatGetType(mat_coo, &mat_type));
118     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
119     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
120     else mem_type = CEED_MEM_HOST;
121 
122     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
123     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE));
124     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
125     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values));
126     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
127     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
128     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
129     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values));
130   }
131   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
132   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
133   PetscFunctionReturn(PETSC_SUCCESS);
134 }
135 
136 /**
137   @brief Assemble inner `Mat` for diagonal `PC` operations
138 
139   Collective across MPI processes.
140 
141   @param[in]   mat_ceed      `MATCEED` to invert
142   @param[in]   use_ceed_pbd  Boolean flag to use libCEED PBD assembly
143   @param[out]  mat_inner     Inner `Mat` for diagonal operations
144 
145   @return An error code: 0 - success, otherwise - failure
146 **/
147 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) {
148   MatCeedContext ctx;
149 
150   PetscFunctionBeginUser;
151   PetscCall(MatShellGetContext(mat_ceed, &ctx));
152   if (use_ceed_pbd) {
153     // Check if COO pattern set
154     if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_pbd_internal));
155 
156     // Assemble mat_assembled_full_internal
157     PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal));
158     if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal;
159   } else {
160     // Check if COO pattern set
161     if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
162 
163     // Assemble mat_assembled_full_internal
164     PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
165     if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal;
166   }
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 /**
171   @brief Get `MATCEED` diagonal block for Jacobi.
172 
173   Collective across MPI processes.
174 
175   @param[in]   mat_ceed   `MATCEED` to invert
176   @param[out]  mat_block  The diagonal block matrix
177 
178   @return An error code: 0 - success, otherwise - failure
179 **/
180 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) {
181   Mat            mat_inner = NULL;
182   MatCeedContext ctx;
183 
184   PetscFunctionBeginUser;
185   PetscCall(MatShellGetContext(mat_ceed, &ctx));
186 
187   // Assemble inner mat if needed
188   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
189 
190   // Get block diagonal
191   PetscCall(MatGetDiagonalBlock(mat_inner, mat_block));
192   PetscFunctionReturn(PETSC_SUCCESS);
193 }
194 
195 /**
196   @brief Invert `MATCEED` diagonal block for Jacobi.
197 
198   Collective across MPI processes.
199 
200   @param[in]   mat_ceed  `MATCEED` to invert
201   @param[out]  values    The block inverses in column major order
202 
203   @return An error code: 0 - success, otherwise - failure
204 **/
205 static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) {
206   Mat            mat_inner = NULL;
207   MatCeedContext ctx;
208 
209   PetscFunctionBeginUser;
210   PetscCall(MatShellGetContext(mat_ceed, &ctx));
211 
212   // Assemble inner mat if needed
213   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
214 
215   // Invert PB diagonal
216   PetscCall(MatInvertBlockDiagonal(mat_inner, values));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 /**
221   @brief Invert `MATCEED` variable diagonal block for Jacobi.
222 
223   Collective across MPI processes.
224 
225   @param[in]   mat_ceed     `MATCEED` to invert
226   @param[in]   num_blocks   The number of blocks on the process
227   @param[in]   block_sizes  The size of each block on the process
228   @param[out]  values       The block inverses in column major order
229 
230   @return An error code: 0 - success, otherwise - failure
231 **/
232 static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) {
233   Mat            mat_inner = NULL;
234   MatCeedContext ctx;
235 
236   PetscFunctionBeginUser;
237   PetscCall(MatShellGetContext(mat_ceed, &ctx));
238 
239   // Assemble inner mat if needed
240   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner));
241 
242   // Invert PB diagonal
243   PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values));
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
247 /**
248   @brief View `MATCEED`.
249 
250   Collective across MPI processes.
251 
252   @param[in]   mat_ceed  `MATCEED` to view
253   @param[in]   viewer    The visualization context
254 
255   @return An error code: 0 - success, otherwise - failure
256 **/
257 static PetscErrorCode MatView_Ceed(Mat mat_ceed, PetscViewer viewer) {
258   PetscBool         is_ascii;
259   PetscViewerFormat format;
260   PetscMPIInt       size, rank;
261   MatCeedContext    ctx;
262 
263   PetscFunctionBeginUser;
264   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
265   PetscCall(MatShellGetContext(mat_ceed, &ctx));
266   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)mat_ceed), &viewer));
267 
268   PetscCall(PetscViewerGetFormat(viewer, &format));
269   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat_ceed), &size));
270   if (size == 1 && format == PETSC_VIEWER_LOAD_BALANCE) PetscFunctionReturn(PETSC_SUCCESS);
271 
272   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat_ceed), &rank));
273   if (rank != 0) PetscFunctionReturn(PETSC_SUCCESS);
274 
275   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &is_ascii));
276   {
277     PetscBool is_detailed     = format == PETSC_VIEWER_ASCII_INFO_DETAIL;
278     char      rank_string[16] = {'\0'};
279     FILE     *file;
280 
281     PetscCall(PetscViewerASCIIPrintf(viewer, "MatCEED:\n  Default COO MatType:%s\n", ctx->coo_mat_type));
282     PetscCall(PetscViewerASCIIGetPointer(viewer, &file));
283     PetscCall(PetscSNPrintf(rank_string, 16, "on Rank %d", rank));
284     PetscCall(PetscViewerASCIIPrintf(viewer, " CeedOperator Apply %s:\n", is_detailed ? rank_string : "Summary"));
285     if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult, file));
286     else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult, file));
287     if (ctx->op_mult_transpose) {
288       PetscCall(PetscViewerASCIIPrintf(viewer, "  CeedOperator ApplyTranspose %s:\n", is_detailed ? rank_string : "Summary"));
289       if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult_transpose, file));
290       else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult_transpose, file));
291     }
292   }
293   PetscFunctionReturn(PETSC_SUCCESS);
294 }
295 
296 // -----------------------------------------------------------------------------
297 // MatCeed
298 // -----------------------------------------------------------------------------
299 
300 /**
301   @brief Create PETSc `Mat` from libCEED operators.
302 
303   Collective across MPI processes.
304 
305   @param[in]   dm_x                      Input `DM`
306   @param[in]   dm_y                      Output `DM`
307   @param[in]   op_mult                   `CeedOperator` for forward evaluation
308   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
309   @param[out]  mat                        New MatCeed
310 
311   @return An error code: 0 - success, otherwise - failure
312 **/
313 PetscErrorCode MatCreateCeed(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) {
314   PetscInt       X_l_size, X_g_size, Y_l_size, Y_g_size;
315   VecType        vec_type;
316   MatCeedContext ctx;
317 
318   PetscFunctionBeginUser;
319   PetscCall(MatCeedRegisterLogEvents());
320 
321   // Collect context data
322   PetscCall(DMGetVecType(dm_x, &vec_type));
323   {
324     Vec X;
325 
326     PetscCall(DMGetGlobalVector(dm_x, &X));
327     PetscCall(VecGetSize(X, &X_g_size));
328     PetscCall(VecGetLocalSize(X, &X_l_size));
329     PetscCall(DMRestoreGlobalVector(dm_x, &X));
330   }
331   if (dm_y) {
332     Vec Y;
333 
334     PetscCall(DMGetGlobalVector(dm_y, &Y));
335     PetscCall(VecGetSize(Y, &Y_g_size));
336     PetscCall(VecGetLocalSize(Y, &Y_l_size));
337     PetscCall(DMRestoreGlobalVector(dm_y, &Y));
338   } else {
339     dm_y     = dm_x;
340     Y_g_size = X_g_size;
341     Y_l_size = X_l_size;
342   }
343 
344   // Create context
345   {
346     Vec X_loc, Y_loc_transpose = NULL;
347 
348     PetscCall(DMCreateLocalVector(dm_x, &X_loc));
349     PetscCall(VecZeroEntries(X_loc));
350     if (op_mult_transpose) {
351       PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose));
352       PetscCall(VecZeroEntries(Y_loc_transpose));
353     }
354     PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE,
355                                    MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE_CEEDOP, &ctx));
356     PetscCall(VecDestroy(&X_loc));
357     PetscCall(VecDestroy(&Y_loc_transpose));
358   }
359 
360   // Create mat
361   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat));
362   PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED));
363   // -- Set block and variable block sizes
364   if (dm_x == dm_y) {
365     MatType dm_mat_type, dm_mat_type_copy;
366     Mat     temp_mat;
367 
368     PetscCall(DMGetMatType(dm_x, &dm_mat_type));
369     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
370     PetscCall(DMSetMatType(dm_x, MATAIJ));
371     PetscCall(DMCreateMatrix(dm_x, &temp_mat));
372     PetscCall(DMSetMatType(dm_x, dm_mat_type_copy));
373     PetscCall(PetscFree(dm_mat_type_copy));
374 
375     {
376       PetscInt        block_size, num_blocks, max_vblock_size = PETSC_INT_MAX;
377       const PetscInt *vblock_sizes;
378 
379       // -- Get block sizes
380       PetscCall(MatGetBlockSize(temp_mat, &block_size));
381       PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes));
382       {
383         PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX};
384 
385         for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]);
386         PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max));
387         max_vblock_size = global_min_max[1];
388       }
389 
390       // -- Copy block sizes
391       if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size));
392       if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes));
393 
394       // -- Check libCEED compatibility
395       {
396         bool is_composite;
397 
398         ctx->is_ceed_pbd_valid  = PETSC_TRUE;
399         ctx->is_ceed_vpbd_valid = PETSC_TRUE;
400         PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite));
401         if (is_composite) {
402           CeedInt       num_sub_operators;
403           CeedOperator *sub_operators;
404 
405           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators));
406           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators));
407           for (CeedInt i = 0; i < num_sub_operators; i++) {
408             CeedInt                  num_bases, num_comp;
409             CeedBasis               *active_bases;
410             CeedOperatorAssemblyData assembly_data;
411 
412             PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data));
413             PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
414             PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
415             if (num_bases > 1) {
416               ctx->is_ceed_pbd_valid  = PETSC_FALSE;
417               ctx->is_ceed_vpbd_valid = PETSC_FALSE;
418             }
419             if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
420             if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
421           }
422         } else {
423           // LCOV_EXCL_START
424           CeedInt                  num_bases, num_comp;
425           CeedBasis               *active_bases;
426           CeedOperatorAssemblyData assembly_data;
427 
428           PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data));
429           PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
430           PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
431           if (num_bases > 1) {
432             ctx->is_ceed_pbd_valid  = PETSC_FALSE;
433             ctx->is_ceed_vpbd_valid = PETSC_FALSE;
434           }
435           if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
436           if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
437           // LCOV_EXCL_STOP
438         }
439         {
440           PetscInt local_is_valid[2], global_is_valid[2];
441 
442           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid;
443           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
444           ctx->is_ceed_pbd_valid = global_is_valid[0];
445           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid;
446           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
447           ctx->is_ceed_vpbd_valid = global_is_valid[0];
448         }
449       }
450     }
451     PetscCall(MatDestroy(&temp_mat));
452   }
453   // -- Set internal mat type
454   {
455     VecType vec_type;
456     MatType coo_mat_type;
457 
458     PetscCall(VecGetType(ctx->X_loc, &vec_type));
459     if (strstr(vec_type, VECCUDA)) coo_mat_type = MATAIJCUSPARSE;
460     else if (strstr(vec_type, VECKOKKOS)) coo_mat_type = MATAIJKOKKOS;
461     else coo_mat_type = MATAIJ;
462     PetscCall(PetscStrallocpy(coo_mat_type, &ctx->coo_mat_type));
463   }
464   // -- Set mat operations
465   PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
466   PetscCall(MatShellSetOperation(*mat, MATOP_VIEW, (void (*)(void))MatView_Ceed));
467   PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed));
468   if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
469   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
470   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
471   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
472   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
473   PetscCall(MatShellSetVecType(*mat, vec_type));
474   PetscFunctionReturn(PETSC_SUCCESS);
475 }
476 
477 /**
478   @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`.
479 
480   Collective across MPI processes.
481 
482   @param[in]   mat_ceed   `MATCEED` to copy from
483   @param[out]  mat_other  `MatShell` or `MATCEED` to copy into
484 
485   @return An error code: 0 - success, otherwise - failure
486 **/
487 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) {
488   PetscFunctionBeginUser;
489   PetscCall(MatCeedRegisterLogEvents());
490 
491   // Check type compatibility
492   {
493     PetscBool is_matceed = PETSC_FALSE, is_matshell = PETSC_FALSE;
494     MatType   mat_type_ceed, mat_type_other;
495 
496     PetscCall(MatGetType(mat_ceed, &mat_type_ceed));
497     PetscCall(PetscStrcmp(mat_type_ceed, MATCEED, &is_matceed));
498     PetscCheck(is_matceed, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED);
499     PetscCall(MatGetType(mat_other, &mat_type_other));
500     PetscCall(PetscStrcmp(mat_type_other, MATCEED, &is_matceed));
501     PetscCall(PetscStrcmp(mat_type_other, MATSHELL, &is_matceed));
502     PetscCheck(is_matceed || is_matshell, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_other must have type " MATCEED " or " MATSHELL);
503   }
504 
505   // Check dimension compatibility
506   {
507     PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size;
508 
509     PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size));
510     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size));
511     PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size));
512     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size));
513     PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) &&
514                    (X_l_ceed_size == X_l_other_size),
515                PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ,
516                "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT
517                "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT
518                ", %" PetscInt_FMT ")",
519                Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size);
520   }
521 
522   // Convert
523   {
524     VecType        vec_type;
525     MatCeedContext ctx;
526 
527     PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED));
528     PetscCall(MatShellGetContext(mat_ceed, &ctx));
529     PetscCall(MatCeedContextReference(ctx));
530     PetscCall(MatShellSetContext(mat_other, ctx));
531     PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
532     PetscCall(MatShellSetOperation(mat_other, MATOP_VIEW, (void (*)(void))MatView_Ceed));
533     PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed));
534     if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
535     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
536     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
537     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
538     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
539     {
540       PetscInt block_size;
541 
542       PetscCall(MatGetBlockSize(mat_ceed, &block_size));
543       if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size));
544     }
545     {
546       PetscInt        num_blocks;
547       const PetscInt *block_sizes;
548 
549       PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes));
550       if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes));
551     }
552     PetscCall(DMGetVecType(ctx->dm_x, &vec_type));
553     PetscCall(MatShellSetVecType(mat_other, vec_type));
554   }
555   PetscFunctionReturn(PETSC_SUCCESS);
556 }
557 
558 /**
559   @brief Mark `CeedQFunction` data as updated and the `CeedQFunction` as requiring re-assembly for a `MatCEED`.
560 
561   Collective across MPI processes.
562 
563   @param[in]   mat_ceed       `MATCEED`
564   @param[out]  update_needed  Boolean flag indicating `CeedQFunction` update needed
565 
566   @return An error code: 0 - success, otherwise - failure
567 **/
568 PetscErrorCode MatCeedSetAssemblyDataUpdateNeeded(Mat mat_ceed, PetscBool update_needed) {
569   MatCeedContext ctx;
570 
571   PetscFunctionBeginUser;
572   PetscCall(MatShellGetContext(mat_ceed, &ctx));
573   PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult, update_needed));
574   if (ctx->op_mult_transpose) {
575     PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult_transpose, update_needed));
576   }
577   if (update_needed) {
578     PetscCall(MatAssemblyBegin(mat_ceed, MAT_FINAL_ASSEMBLY));
579     PetscCall(MatAssemblyEnd(mat_ceed, MAT_FINAL_ASSEMBLY));
580   }
581   PetscFunctionReturn(PETSC_SUCCESS);
582 }
583 
584 /**
585   @brief Setup a `Mat` with the same COO pattern as a `MatCEED`.
586 
587   Collective across MPI processes.
588 
589   @param[in]   mat_ceed  `MATCEED`
590   @param[out]  mat_coo   Sparse `Mat` with same COO pattern
591 
592   @return An error code: 0 - success, otherwise - failure
593 **/
594 PetscErrorCode MatCeedCreateMatCOO(Mat mat_ceed, Mat *mat_coo) {
595   MatCeedContext ctx;
596 
597   PetscFunctionBeginUser;
598   PetscCall(MatShellGetContext(mat_ceed, &ctx));
599 
600   PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "COO assembly only supported for MATCEED on a single DM");
601 
602   // Check cl mat type
603   {
604     PetscBool is_coo_mat_type_cl = PETSC_FALSE;
605     char      coo_mat_type_cl[64];
606 
607     // Check for specific CL coo mat type for this Mat
608     {
609       const char *mat_ceed_prefix = NULL;
610 
611       PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix));
612       PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL);
613       PetscCall(PetscOptionsFList("-ceed_coo_mat_type", "Default MATCEED COO assembly MatType", NULL, MatList, coo_mat_type_cl, coo_mat_type_cl,
614                                   sizeof(coo_mat_type_cl), &is_coo_mat_type_cl));
615       PetscOptionsEnd();
616       if (is_coo_mat_type_cl) {
617         PetscCall(PetscFree(ctx->coo_mat_type));
618         PetscCall(PetscStrallocpy(coo_mat_type_cl, &ctx->coo_mat_type));
619       }
620     }
621   }
622 
623   // Create sparse matrix
624   {
625     MatType dm_mat_type, dm_mat_type_copy;
626 
627     PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type));
628     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
629     PetscCall(DMSetMatType(ctx->dm_x, ctx->coo_mat_type));
630     PetscCall(DMCreateMatrix(ctx->dm_x, mat_coo));
631     PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy));
632     PetscCall(PetscFree(dm_mat_type_copy));
633   }
634   PetscFunctionReturn(PETSC_SUCCESS);
635 }
636 
637 /**
638   @brief Setup the COO preallocation `MATCEED` into a `MATAIJ` or similar.
639          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
640 
641   Collective across MPI processes.
642 
643   @param[in]      mat_ceed  `MATCEED` to assemble
644   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
645 
646   @return An error code: 0 - success, otherwise - failure
647 **/
648 PetscErrorCode MatCeedSetPreallocationCOO(Mat mat_ceed, Mat mat_coo) {
649   MatCeedContext ctx;
650 
651   PetscFunctionBeginUser;
652   PetscCall(MatShellGetContext(mat_ceed, &ctx));
653 
654   {
655     PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
656     CeedInt      *rows_ceed, *cols_ceed;
657     PetscCount    num_entries;
658     PetscLogStage stage_amg_setup;
659 
660     // -- Assemble sparsity pattern if mat hasn't been assembled before
661     PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
662     if (stage_amg_setup == -1) {
663       PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
664     }
665     PetscCall(PetscLogStagePush(stage_amg_setup));
666     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
667     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
668     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
669     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
670     PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
671     PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
672     PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
673     free(rows_petsc);
674     free(cols_petsc);
675     if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full));
676     PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full));
677     ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo;
678     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
679     PetscCall(PetscLogStagePop());
680   }
681   PetscFunctionReturn(PETSC_SUCCESS);
682 }
683 
684 /**
685   @brief Assemble a `MATCEED` into a `MATAIJ` or similar.
686          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
687          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
688 
689   Collective across MPI processes.
690 
691   @param[in]      mat_ceed  `MATCEED` to assemble
692   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
693 
694   @return An error code: 0 - success, otherwise - failure
695 **/
696 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) {
697   MatCeedContext ctx;
698 
699   PetscFunctionBeginUser;
700   PetscCall(MatShellGetContext(mat_ceed, &ctx));
701 
702   // Set COO pattern if needed
703   {
704     CeedInt index = -1;
705 
706     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
707       if (ctx->mats_assembled_full[i] == mat_coo) index = i;
708     }
709     if (index == -1) PetscCall(MatCeedSetPreallocationCOO(mat_ceed, mat_coo));
710   }
711 
712   // Assemble mat_ceed
713   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
714   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
715   {
716     const CeedScalar *values;
717     MatType           mat_type;
718     CeedMemType       mem_type = CEED_MEM_HOST;
719     PetscBool         is_spd, is_spd_known;
720 
721     PetscCall(MatGetType(mat_coo, &mat_type));
722     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
723     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
724     else mem_type = CEED_MEM_HOST;
725 
726     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
727     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full));
728     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
729     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values));
730     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
731     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
732     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
733     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values));
734   }
735   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
736   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
737   PetscFunctionReturn(PETSC_SUCCESS);
738 }
739 
740 /**
741   @brief Set the current value of a context field for a `MatCEED`.
742 
743   Not collective across MPI processes.
744 
745   @param[in,out]  mat    `MatCEED`
746   @param[in]      name   Name of the context field
747   @param[in]      value  New context field value
748 
749   @return An error code: 0 - success, otherwise - failure
750 **/
751 PetscErrorCode MatCeedSetContextDouble(Mat mat, const char *name, double value) {
752   PetscBool      was_updated = PETSC_FALSE;
753   MatCeedContext ctx;
754 
755   PetscFunctionBeginUser;
756   PetscCall(MatShellGetContext(mat, &ctx));
757   {
758     CeedContextFieldLabel label = NULL;
759 
760     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult, name, &label));
761     if (label) {
762       double set_value = 2 * value + 1.0;
763 
764       PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
765       if (set_value != value) {
766         PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult, label, &value));
767         was_updated = PETSC_TRUE;
768       }
769     }
770     if (ctx->op_mult_transpose) {
771       label = NULL;
772       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult_transpose, name, &label));
773       if (label) {
774         double set_value = 2 * value + 1.0;
775 
776         PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
777         if (set_value != value) {
778           PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult_transpose, label, &value));
779           was_updated = PETSC_TRUE;
780         }
781       }
782     }
783   }
784   if (was_updated) {
785     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
786     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
787   }
788   PetscFunctionReturn(PETSC_SUCCESS);
789 }
790 
791 /**
792   @brief Get the current value of a context field for a `MatCEED`.
793 
794   Not collective across MPI processes.
795 
796   @param[in]   mat    `MatCEED`
797   @param[in]   name   Name of the context field
798   @param[out]  value  Current context field value
799 
800   @return An error code: 0 - success, otherwise - failure
801 **/
802 PetscErrorCode MatCeedGetContextDouble(Mat mat, const char *name, double *value) {
803   MatCeedContext ctx;
804 
805   PetscFunctionBeginUser;
806   PetscCall(MatShellGetContext(mat, &ctx));
807   {
808     CeedContextFieldLabel label = NULL;
809     CeedOperator          op    = ctx->op_mult;
810 
811     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
812     if (!label && ctx->op_mult_transpose) {
813       op = ctx->op_mult_transpose;
814       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
815     }
816     if (label) {
817       PetscSizeT    num_values;
818       const double *values_ceed;
819 
820       PetscCallCeed(ctx->ceed, CeedOperatorGetContextDoubleRead(op, label, &num_values, &values_ceed));
821       *value = values_ceed[0];
822       PetscCallCeed(ctx->ceed, CeedOperatorRestoreContextDoubleRead(op, label, &values_ceed));
823     }
824   }
825   PetscFunctionReturn(PETSC_SUCCESS);
826 }
827 
828 /**
829   @brief Set the current `PetscReal` value of a context field for a `MatCEED`.
830 
831   Not collective across MPI processes.
832 
833   @param[in,out]  mat    `MatCEED`
834   @param[in]      name   Name of the context field
835   @param[in]      value  New context field value
836 
837   @return An error code: 0 - success, otherwise - failure
838 **/
839 PetscErrorCode MatCeedSetContextReal(Mat mat, const char *name, PetscReal value) {
840   double value_double = value;
841 
842   PetscFunctionBeginUser;
843   PetscCall(MatCeedSetContextDouble(mat, name, value_double));
844   PetscFunctionReturn(PETSC_SUCCESS);
845 }
846 
847 /**
848   @brief Get the current `PetscReal` value of a context field for a `MatCEED`.
849 
850   Not collective across MPI processes.
851 
852   @param[in]   mat    `MatCEED`
853   @param[in]   name   Name of the context field
854   @param[out]  value  Current context field value
855 
856   @return An error code: 0 - success, otherwise - failure
857 **/
858 PetscErrorCode MatCeedGetContextReal(Mat mat, const char *name, PetscReal *value) {
859   double value_double = 0.0;
860 
861   PetscFunctionBeginUser;
862   PetscCall(MatCeedGetContextDouble(mat, name, &value_double));
863   *value = value_double;
864   PetscFunctionReturn(PETSC_SUCCESS);
865 }
866 
867 /**
868   @brief Set the current time for a `MatCEED`.
869 
870   Not collective across MPI processes.
871 
872   @param[in,out]  mat   `MatCEED`
873   @param[in]      time  Current time
874 
875   @return An error code: 0 - success, otherwise - failure
876 **/
877 PetscErrorCode MatCeedSetTime(Mat mat, PetscReal time) {
878   PetscFunctionBeginUser;
879   {
880     double time_ceed = time;
881 
882     PetscCall(MatCeedSetContextDouble(mat, "time", time_ceed));
883   }
884   PetscFunctionReturn(PETSC_SUCCESS);
885 }
886 
887 /**
888   @brief Get the current time for a `MatCEED`.
889 
890   Not collective across MPI processes.
891 
892   @param[in]   mat   `MatCEED`
893   @param[out]  time  Current time, or -1.0 if the boundary evaluator has no time field
894 
895   @return An error code: 0 - success, otherwise - failure
896 **/
897 PetscErrorCode MatCeedGetTime(Mat mat, PetscReal *time) {
898   PetscFunctionBeginUser;
899   *time = -1.0;
900   {
901     double time_ceed = -1.0;
902 
903     PetscCall(MatCeedGetContextDouble(mat, "time", &time_ceed));
904     *time = time_ceed;
905   }
906   PetscFunctionReturn(PETSC_SUCCESS);
907 }
908 
909 /**
910   @brief Set the current time step for a `MatCEED`.
911 
912   Not collective across MPI processes.
913 
914   @param[in,out]  mat  `MatCEED`
915   @param[in]      dt   Current time step
916 
917   @return An error code: 0 - success, otherwise - failure
918 **/
919 PetscErrorCode MatCeedSetDt(Mat mat, PetscReal dt) {
920   PetscFunctionBeginUser;
921   {
922     double dt_ceed = dt;
923 
924     PetscCall(MatCeedSetContextDouble(mat, "dt", dt_ceed));
925   }
926   PetscFunctionReturn(PETSC_SUCCESS);
927 }
928 
929 /**
930   @brief Set the Jacobian shifts for a `MatCEED`.
931 
932   Not collective across MPI processes.
933 
934   @param[in,out]  mat      `MatCEED`
935   @param[in]      shift_v  Velocity shift
936   @param[in]      shift_a  Acceleration shift
937 
938   @return An error code: 0 - success, otherwise - failure
939 **/
940 PetscErrorCode MatCeedSetShifts(Mat mat, PetscReal shift_v, PetscReal shift_a) {
941   PetscFunctionBeginUser;
942   {
943     double shift_v_ceed = shift_v;
944 
945     PetscCall(MatCeedSetContextDouble(mat, "shift v", shift_v_ceed));
946   }
947   if (shift_a) {
948     double shift_a_ceed = shift_a;
949 
950     PetscCall(MatCeedSetContextDouble(mat, "shift a", shift_a_ceed));
951   }
952   PetscFunctionReturn(PETSC_SUCCESS);
953 }
954 
955 /**
956   @brief Set user context for a `MATCEED`.
957 
958   Collective across MPI processes.
959 
960   @param[in,out]  mat  `MATCEED`
961   @param[in]      f    The context destroy function, or NULL
962   @param[in]      ctx  User context, or NULL to unset
963 
964   @return An error code: 0 - success, otherwise - failure
965 **/
966 PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) {
967   PetscContainer user_ctx = NULL;
968 
969   PetscFunctionBeginUser;
970   if (ctx) {
971     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx));
972     PetscCall(PetscContainerSetPointer(user_ctx, ctx));
973     PetscCall(PetscContainerSetUserDestroy(user_ctx, f));
974   }
975   PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx));
976   PetscCall(PetscContainerDestroy(&user_ctx));
977   PetscFunctionReturn(PETSC_SUCCESS);
978 }
979 
980 /**
981   @brief Retrieve the user context for a `MATCEED`.
982 
983   Collective across MPI processes.
984 
985   @param[in,out]  mat  `MATCEED`
986   @param[in]      ctx  User context
987 
988   @return An error code: 0 - success, otherwise - failure
989 **/
990 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) {
991   PetscContainer user_ctx;
992 
993   PetscFunctionBeginUser;
994   PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx));
995   if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx));
996   else *(void **)ctx = NULL;
997   PetscFunctionReturn(PETSC_SUCCESS);
998 }
999 /**
1000   @brief Set a user defined matrix operation for a `MATCEED` matrix.
1001 
1002   Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by
1003 `MatCeedSetContext()`.
1004 
1005   Collective across MPI processes.
1006 
1007   @param[in,out]  mat  `MATCEED`
1008   @param[in]      op   Name of the `MatOperation`
1009   @param[in]      g    Function that provides the operation
1010 
1011   @return An error code: 0 - success, otherwise - failure
1012 **/
1013 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) {
1014   PetscFunctionBeginUser;
1015   PetscCall(MatShellSetOperation(mat, op, g));
1016   PetscFunctionReturn(PETSC_SUCCESS);
1017 }
1018 
1019 /**
1020   @brief Sets the default COO matrix type as a string from the `MATCEED`.
1021 
1022   Collective across MPI processes.
1023 
1024   @param[in,out]  mat   `MATCEED`
1025   @param[in]      type  COO `MatType` to set
1026 
1027   @return An error code: 0 - success, otherwise - failure
1028 **/
1029 PetscErrorCode MatCeedSetCOOMatType(Mat mat, MatType type) {
1030   MatCeedContext ctx;
1031 
1032   PetscFunctionBeginUser;
1033   PetscCall(MatShellGetContext(mat, &ctx));
1034   // Check if same
1035   {
1036     size_t    len_old, len_new;
1037     PetscBool is_same = PETSC_FALSE;
1038 
1039     PetscCall(PetscStrlen(ctx->coo_mat_type, &len_old));
1040     PetscCall(PetscStrlen(type, &len_new));
1041     if (len_old == len_new) PetscCall(PetscStrncmp(ctx->coo_mat_type, type, len_old, &is_same));
1042     if (is_same) PetscFunctionReturn(PETSC_SUCCESS);
1043   }
1044   // Clean up old mats in different format
1045   // LCOV_EXCL_START
1046   if (ctx->mat_assembled_full_internal) {
1047     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
1048       if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) {
1049         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) {
1050           ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j];
1051         }
1052         ctx->num_mats_assembled_full--;
1053         // Note: we'll realloc this array again, so no need to shrink the allocation
1054         PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1055       }
1056     }
1057   }
1058   if (ctx->mat_assembled_pbd_internal) {
1059     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
1060       if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) {
1061         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) {
1062           ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j];
1063         }
1064         // Note: we'll realloc this array again, so no need to shrink the allocation
1065         ctx->num_mats_assembled_pbd--;
1066         PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1067       }
1068     }
1069   }
1070   PetscCall(PetscFree(ctx->coo_mat_type));
1071   PetscCall(PetscStrallocpy(type, &ctx->coo_mat_type));
1072   PetscFunctionReturn(PETSC_SUCCESS);
1073   // LCOV_EXCL_STOP
1074 }
1075 
1076 /**
1077   @brief Gets the default COO matrix type as a string from the `MATCEED`.
1078 
1079   Collective across MPI processes.
1080 
1081   @param[in,out]  mat   `MATCEED`
1082   @param[in]      type  COO `MatType`
1083 
1084   @return An error code: 0 - success, otherwise - failure
1085 **/
1086 PetscErrorCode MatCeedGetCOOMatType(Mat mat, MatType *type) {
1087   MatCeedContext ctx;
1088 
1089   PetscFunctionBeginUser;
1090   PetscCall(MatShellGetContext(mat, &ctx));
1091   *type = ctx->coo_mat_type;
1092   PetscFunctionReturn(PETSC_SUCCESS);
1093 }
1094 
1095 /**
1096   @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1097 
1098   Not collective across MPI processes.
1099 
1100   @param[in,out]  mat              `MATCEED`
1101   @param[in]      X_loc            Input PETSc local vector, or NULL
1102   @param[in]      Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1103 
1104   @return An error code: 0 - success, otherwise - failure
1105 **/
1106 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) {
1107   MatCeedContext ctx;
1108 
1109   PetscFunctionBeginUser;
1110   PetscCall(MatShellGetContext(mat, &ctx));
1111   if (X_loc) {
1112     PetscInt len_old, len_new;
1113 
1114     PetscCall(VecGetSize(ctx->X_loc, &len_old));
1115     PetscCall(VecGetSize(X_loc, &len_new));
1116     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT,
1117                len_new, len_old);
1118     PetscCall(VecReferenceCopy(X_loc, &ctx->X_loc));
1119   }
1120   if (Y_loc_transpose) {
1121     PetscInt len_old, len_new;
1122 
1123     PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old));
1124     PetscCall(VecGetSize(Y_loc_transpose, &len_new));
1125     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB,
1126                "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old);
1127     PetscCall(VecReferenceCopy(Y_loc_transpose, &ctx->Y_loc_transpose));
1128   }
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
1132 /**
1133   @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1134 
1135   Not collective across MPI processes.
1136 
1137   @param[in,out]  mat              `MATCEED`
1138   @param[out]     X_loc            Input PETSc local vector, or NULL
1139   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1140 
1141   @return An error code: 0 - success, otherwise - failure
1142 **/
1143 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1144   MatCeedContext ctx;
1145 
1146   PetscFunctionBeginUser;
1147   PetscCall(MatShellGetContext(mat, &ctx));
1148   if (X_loc) {
1149     *X_loc = NULL;
1150     PetscCall(VecReferenceCopy(ctx->X_loc, X_loc));
1151   }
1152   if (Y_loc_transpose) {
1153     *Y_loc_transpose = NULL;
1154     PetscCall(VecReferenceCopy(ctx->Y_loc_transpose, Y_loc_transpose));
1155   }
1156   PetscFunctionReturn(PETSC_SUCCESS);
1157 }
1158 
1159 /**
1160   @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1161 
1162   Not collective across MPI processes.
1163 
1164   @param[in,out]  mat              MatCeed
1165   @param[out]     X_loc            Input PETSc local vector, or NULL
1166   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1167 
1168   @return An error code: 0 - success, otherwise - failure
1169 **/
1170 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1171   PetscFunctionBeginUser;
1172   if (X_loc) PetscCall(VecDestroy(X_loc));
1173   if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose));
1174   PetscFunctionReturn(PETSC_SUCCESS);
1175 }
1176 
1177 /**
1178   @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1179 
1180   Not collective across MPI processes.
1181 
1182   @param[in,out]  mat                MatCeed
1183   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1184   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1185 
1186   @return An error code: 0 - success, otherwise - failure
1187 **/
1188 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1189   MatCeedContext ctx;
1190 
1191   PetscFunctionBeginUser;
1192   PetscCall(MatShellGetContext(mat, &ctx));
1193   if (op_mult) {
1194     *op_mult = NULL;
1195     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult));
1196   }
1197   if (op_mult_transpose) {
1198     *op_mult_transpose = NULL;
1199     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose));
1200   }
1201   PetscFunctionReturn(PETSC_SUCCESS);
1202 }
1203 
1204 /**
1205   @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1206 
1207   Not collective across MPI processes.
1208 
1209   @param[in,out]  mat                MatCeed
1210   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1211   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1212 
1213   @return An error code: 0 - success, otherwise - failure
1214 **/
1215 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1216   MatCeedContext ctx;
1217 
1218   PetscFunctionBeginUser;
1219   PetscCall(MatShellGetContext(mat, &ctx));
1220   if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult));
1221   if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose));
1222   PetscFunctionReturn(PETSC_SUCCESS);
1223 }
1224 
1225 /**
1226   @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1227 
1228   Not collective across MPI processes.
1229 
1230   @param[in,out]  mat                       MatCeed
1231   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1232   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1233 
1234   @return An error code: 0 - success, otherwise - failure
1235 **/
1236 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1237   MatCeedContext ctx;
1238 
1239   PetscFunctionBeginUser;
1240   PetscCall(MatShellGetContext(mat, &ctx));
1241   if (log_event_mult) ctx->log_event_mult = log_event_mult;
1242   if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose;
1243   PetscFunctionReturn(PETSC_SUCCESS);
1244 }
1245 
1246 /**
1247   @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1248 
1249   Not collective across MPI processes.
1250 
1251   @param[in,out]  mat                       MatCeed
1252   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1253   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1254 
1255   @return An error code: 0 - success, otherwise - failure
1256 **/
1257 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1258   MatCeedContext ctx;
1259 
1260   PetscFunctionBeginUser;
1261   PetscCall(MatShellGetContext(mat, &ctx));
1262   if (log_event_mult) *log_event_mult = ctx->log_event_mult;
1263   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose;
1264   PetscFunctionReturn(PETSC_SUCCESS);
1265 }
1266 
1267 /**
1268   @brief Set `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1269 
1270   Not collective across MPI processes.
1271 
1272   @param[in,out]  mat                       MatCeed
1273   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1274   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1275 
1276   @return An error code: 0 - success, otherwise - failure
1277 **/
1278 PetscErrorCode MatCeedSetCeedOperatorLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1279   MatCeedContext ctx;
1280 
1281   PetscFunctionBeginUser;
1282   PetscCall(MatShellGetContext(mat, &ctx));
1283   if (log_event_mult) ctx->log_event_ceed_mult = log_event_mult;
1284   if (log_event_mult_transpose) ctx->log_event_ceed_mult_transpose = log_event_mult_transpose;
1285   PetscFunctionReturn(PETSC_SUCCESS);
1286 }
1287 
1288 /**
1289   @brief Get `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1290 
1291   Not collective across MPI processes.
1292 
1293   @param[in,out]  mat                       MatCeed
1294   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1295   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1296 
1297   @return An error code: 0 - success, otherwise - failure
1298 **/
1299 PetscErrorCode MatCeedGetCeedOperatorLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1300   MatCeedContext ctx;
1301 
1302   PetscFunctionBeginUser;
1303   PetscCall(MatShellGetContext(mat, &ctx));
1304   if (log_event_mult) *log_event_mult = ctx->log_event_ceed_mult;
1305   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_ceed_mult_transpose;
1306   PetscFunctionReturn(PETSC_SUCCESS);
1307 }
1308 
1309 // -----------------------------------------------------------------------------
1310 // Operator context data
1311 // -----------------------------------------------------------------------------
1312 
1313 /**
1314   @brief Setup context data for operator application.
1315 
1316   Collective across MPI processes.
1317 
1318   @param[in]   dm_x                           Input `DM`
1319   @param[in]   dm_y                           Output `DM`
1320   @param[in]   X_loc                          Input PETSc local vector, or NULL
1321   @param[in]   Y_loc_transpose                Input PETSc local vector for transpose operation, or NULL
1322   @param[in]   op_mult                        `CeedOperator` for forward evaluation
1323   @param[in]   op_mult_transpose              `CeedOperator` for transpose evaluation
1324   @param[in]   log_event_mult                 `PetscLogEvent` for forward evaluation
1325   @param[in]   log_event_mult_transpose       `PetscLogEvent` for transpose evaluation
1326   @param[in]   log_event_ceed_mult            `PetscLogEvent` for forward `CeedOperator` evaluation
1327   @param[in]   log_event_ceed_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation
1328   @param[out]  ctx                            Context data for operator evaluation
1329 
1330   @return An error code: 0 - success, otherwise - failure
1331 **/
1332 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose,
1333                                     PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, PetscLogEvent log_event_ceed_mult,
1334                                     PetscLogEvent log_event_ceed_mult_transpose, MatCeedContext *ctx) {
1335   CeedSize x_loc_len, y_loc_len;
1336 
1337   PetscFunctionBeginUser;
1338 
1339   // Allocate
1340   PetscCall(PetscNew(ctx));
1341   (*ctx)->ref_count = 1;
1342 
1343   // Logging
1344   (*ctx)->log_event_mult                = log_event_mult;
1345   (*ctx)->log_event_mult_transpose      = log_event_mult_transpose;
1346   (*ctx)->log_event_ceed_mult           = log_event_ceed_mult;
1347   (*ctx)->log_event_ceed_mult_transpose = log_event_ceed_mult_transpose;
1348 
1349   // PETSc objects
1350   PetscCall(DMReferenceCopy(dm_x, &(*ctx)->dm_x));
1351   PetscCall(DMReferenceCopy(dm_y, &(*ctx)->dm_y));
1352   if (X_loc) PetscCall(VecReferenceCopy(X_loc, &(*ctx)->X_loc));
1353   if (Y_loc_transpose) PetscCall(VecReferenceCopy(Y_loc_transpose, &(*ctx)->Y_loc_transpose));
1354 
1355   // Memtype
1356   {
1357     const PetscScalar *x;
1358     Vec                X;
1359 
1360     PetscCall(DMGetLocalVector(dm_x, &X));
1361     PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type));
1362     PetscCall(VecRestoreArrayReadAndMemType(X, &x));
1363     PetscCall(DMRestoreLocalVector(dm_x, &X));
1364   }
1365 
1366   // libCEED objects
1367   PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB,
1368              "retrieving Ceed context object failed");
1369   PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed));
1370   PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len));
1371   PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult));
1372   if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose));
1373   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc));
1374   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc));
1375 
1376   // Flop counting
1377   {
1378     CeedSize ceed_flops_estimate = 0;
1379 
1380     PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate));
1381     (*ctx)->flops_mult = ceed_flops_estimate;
1382     if (op_mult_transpose) {
1383       PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate));
1384       (*ctx)->flops_mult_transpose = ceed_flops_estimate;
1385     }
1386   }
1387 
1388   // Check sizes
1389   if (x_loc_len > 0 || y_loc_len > 0) {
1390     CeedSize ctx_x_loc_len, ctx_y_loc_len;
1391     PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len;
1392     Vec      dm_X_loc, dm_Y_loc;
1393 
1394     // -- Input
1395     PetscCall(DMGetLocalVector(dm_x, &dm_X_loc));
1396     PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len));
1397     PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc));
1398     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len));
1399     if (X_loc) {
1400       PetscCall(VecGetLocalSize(X_loc, &X_loc_len));
1401       PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1402                  "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len);
1403     }
1404     PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions",
1405                x_loc_len, dm_x_loc_len);
1406     PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")",
1407                x_loc_len, ctx_x_loc_len);
1408 
1409     // -- Output
1410     PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc));
1411     PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len));
1412     PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc));
1413     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len));
1414     PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions",
1415                ctx_y_loc_len, dm_y_loc_len);
1416 
1417     // -- Transpose
1418     if (Y_loc_transpose) {
1419       PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len));
1420       PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1421                  "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len);
1422     }
1423   }
1424   PetscFunctionReturn(PETSC_SUCCESS);
1425 }
1426 
1427 /**
1428   @brief Increment reference counter for `MATCEED` context.
1429 
1430   Not collective across MPI processes.
1431 
1432   @param[in,out]  ctx  Context data
1433 
1434   @return An error code: 0 - success, otherwise - failure
1435 **/
1436 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) {
1437   PetscFunctionBeginUser;
1438   ctx->ref_count++;
1439   PetscFunctionReturn(PETSC_SUCCESS);
1440 }
1441 
1442 /**
1443   @brief Copy reference for `MATCEED`.
1444          Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`.
1445 
1446   Not collective across MPI processes.
1447 
1448   @param[in]   ctx       Context data
1449   @param[out]  ctx_copy  Copy of pointer to context data
1450 
1451   @return An error code: 0 - success, otherwise - failure
1452 **/
1453 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) {
1454   PetscFunctionBeginUser;
1455   PetscCall(MatCeedContextReference(ctx));
1456   PetscCall(MatCeedContextDestroy(*ctx_copy));
1457   *ctx_copy = ctx;
1458   PetscFunctionReturn(PETSC_SUCCESS);
1459 }
1460 
1461 /**
1462   @brief Destroy context data for operator application.
1463 
1464   Collective across MPI processes.
1465 
1466   @param[in,out]  ctx  Context data for operator evaluation
1467 
1468   @return An error code: 0 - success, otherwise - failure
1469 **/
1470 PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) {
1471   PetscFunctionBeginUser;
1472   if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS);
1473 
1474   // PETSc objects
1475   PetscCall(DMDestroy(&ctx->dm_x));
1476   PetscCall(DMDestroy(&ctx->dm_y));
1477   PetscCall(VecDestroy(&ctx->X_loc));
1478   PetscCall(VecDestroy(&ctx->Y_loc_transpose));
1479   PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1480   PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1481   PetscCall(PetscFree(ctx->coo_mat_type));
1482   PetscCall(PetscFree(ctx->mats_assembled_full));
1483   PetscCall(PetscFree(ctx->mats_assembled_pbd));
1484 
1485   // libCEED objects
1486   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc));
1487   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc));
1488   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full));
1489   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd));
1490   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult));
1491   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose));
1492   PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed");
1493 
1494   // Deallocate
1495   ctx->is_destroyed = PETSC_TRUE;  // Flag as destroyed in case someone has stale ref
1496   PetscCall(PetscFree(ctx));
1497   PetscFunctionReturn(PETSC_SUCCESS);
1498 }
1499 
1500 /**
1501   @brief Compute the diagonal of an operator via libCEED.
1502 
1503   Collective across MPI processes.
1504 
1505   @param[in]   A  `MATCEED`
1506   @param[out]  D  Vector holding operator diagonal
1507 
1508   @return An error code: 0 - success, otherwise - failure
1509 **/
1510 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) {
1511   PetscMemType   mem_type;
1512   Vec            D_loc;
1513   MatCeedContext ctx;
1514 
1515   PetscFunctionBeginUser;
1516   PetscCall(MatShellGetContext(A, &ctx));
1517 
1518   // Place PETSc vector in libCEED vector
1519   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1520   PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc));
1521   PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc));
1522 
1523   // Compute Diagonal
1524   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1525   PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1526   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1527 
1528   // Restore PETSc vector
1529   PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc));
1530 
1531   // Local-to-Global
1532   PetscCall(VecZeroEntries(D));
1533   PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D));
1534   PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc));
1535   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1536   PetscFunctionReturn(PETSC_SUCCESS);
1537 }
1538 
1539 /**
1540   @brief Compute `A X = Y` for a `MATCEED`.
1541 
1542   Collective across MPI processes.
1543 
1544   @param[in]   A  `MATCEED`
1545   @param[in]   X  Input PETSc vector
1546   @param[out]  Y  Output PETSc vector
1547 
1548   @return An error code: 0 - success, otherwise - failure
1549 **/
1550 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) {
1551   MatCeedContext ctx;
1552 
1553   PetscFunctionBeginUser;
1554   PetscCall(MatShellGetContext(A, &ctx));
1555   PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, NULL));
1556 
1557   {
1558     PetscMemType x_mem_type, y_mem_type;
1559     Vec          X_loc = ctx->X_loc, Y_loc;
1560 
1561     // Get local vectors
1562     if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1563     PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1564 
1565     // Global-to-local
1566     PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc));
1567 
1568     // Setup libCEED vectors
1569     PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1570     PetscCall(VecZeroEntries(Y_loc));
1571     PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1572 
1573     // Apply libCEED operator
1574     PetscCall(PetscLogGpuTimeBegin());
1575     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult, A, X, Y, NULL));
1576     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE));
1577     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult, A, X, Y, NULL));
1578     PetscCall(PetscLogGpuTimeEnd());
1579 
1580     // Restore PETSc vectors
1581     PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1582     PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1583 
1584     // Local-to-global
1585     PetscCall(VecZeroEntries(Y));
1586     PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y));
1587 
1588     // Restore local vectors, as needed
1589     if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1590     PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1591   }
1592 
1593   // Log flops
1594   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult));
1595   else PetscCall(PetscLogFlops(ctx->flops_mult));
1596   PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, NULL));
1597   PetscFunctionReturn(PETSC_SUCCESS);
1598 }
1599 
1600 /**
1601   @brief Compute `A^T Y = X` for a `MATCEED`.
1602 
1603   Collective across MPI processes.
1604 
1605   @param[in]   A  `MATCEED`
1606   @param[in]   Y  Input PETSc vector
1607   @param[out]  X  Output PETSc vector
1608 
1609   @return An error code: 0 - success, otherwise - failure
1610 **/
1611 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) {
1612   MatCeedContext ctx;
1613 
1614   PetscFunctionBeginUser;
1615   PetscCall(MatShellGetContext(A, &ctx));
1616   PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, NULL));
1617 
1618   {
1619     PetscMemType x_mem_type, y_mem_type;
1620     Vec          X_loc, Y_loc = ctx->Y_loc_transpose;
1621 
1622     // Get local vectors
1623     if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1624     PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1625 
1626     // Global-to-local
1627     PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc));
1628 
1629     // Setup libCEED vectors
1630     PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1631     PetscCall(VecZeroEntries(X_loc));
1632     PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1633 
1634     // Apply libCEED operator
1635     PetscCall(PetscLogGpuTimeBegin());
1636     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1637     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1638     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1639     PetscCall(PetscLogGpuTimeEnd());
1640 
1641     // Restore PETSc vectors
1642     PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1643     PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1644 
1645     // Local-to-global
1646     PetscCall(VecZeroEntries(X));
1647     PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X));
1648 
1649     // Restore local vectors, as needed
1650     if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1651     PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1652   }
1653 
1654   // Log flops
1655   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose));
1656   else PetscCall(PetscLogFlops(ctx->flops_mult_transpose));
1657   PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, NULL));
1658   PetscFunctionReturn(PETSC_SUCCESS);
1659 }
1660