xref: /honee/src/mat-ceed.c (revision a39f38e0d7729dd76b7e4da3e5c9f22d3148d2b4)
1 /// @file
2 /// MatCEED implementation
3 
4 #include <ceed.h>
5 #include <ceed/backend.h>
6 #include <mat-ceed-impl.h>
7 #include <mat-ceed.h>
8 #include <petsc-ceed-utils.h>
9 #include <petsc-ceed.h>
10 #include <petscdm.h>
11 #include <petscmat.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 PetscClassId  MATCEED_CLASSID;
18 PetscLogEvent MATCEED_MULT, MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE, MATCEED_MULT_TRANSPOSE_CEEDOP, MATCEED_ASSEMBLE_DIAGONAL,
19     MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, MATCEED_SETUP_PBDIAGONAL, MATCEED_SETUP_PBDIAGONAL_CEEDOP, MATCEED_ASSEMBLE_PBDIAGONAL,
20     MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, MATCEED_SETUP_FULL, MATCEED_SETUP_FULL_CEEDOP, MATCEED_ASSEMBLE_FULL, MATCEED_ASSEMBLE_FULL_CEEDOP;
21 
22 /**
23   @brief Register MATCEED log events.
24 
25   Not collective across MPI processes.
26 
27   @return An error code: 0 - success, otherwise - failure
28 **/
29 static PetscErrorCode MatCeedRegisterLogEvents() {
30   static PetscBool registered = PETSC_FALSE;
31 
32   PetscFunctionBeginUser;
33   if (registered) PetscFunctionReturn(PETSC_SUCCESS);
34   PetscCall(PetscClassIdRegister("MatCEED", &MATCEED_CLASSID));
35   PetscCall(PetscLogEventRegister("MatCEEDMul", MATCEED_CLASSID, &MATCEED_MULT));
36   PetscCall(PetscLogEventRegister("MatCEEDMulCeed", MATCEED_CLASSID, &MATCEED_MULT_CEEDOP));
37   PetscCall(PetscLogEventRegister("MatCEEDMulT", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE));
38   PetscCall(PetscLogEventRegister("MatCEEDMulTCeed", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE_CEEDOP));
39   PetscCall(PetscLogEventRegister("MatCEEDAsmDiag", MATCEED_CLASSID, &MATCEED_ASSEMBLE_DIAGONAL));
40   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSU", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL));
41   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL_CEEDOP));
42   PetscCall(PetscLogEventRegister("MatCEEDAsmPBD", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL));
43   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP));
44   PetscCall(PetscLogEventRegister("MatCEEDAsmSU", MATCEED_CLASSID, &MATCEED_SETUP_FULL));
45   PetscCall(PetscLogEventRegister("MatCEEDAsmSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_FULL_CEEDOP));
46   PetscCall(PetscLogEventRegister("MatCEEDAsm", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL));
47   PetscCall(PetscLogEventRegister("MatCEEDAsmCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL_CEEDOP));
48   registered = PETSC_TRUE;
49   PetscFunctionReturn(PETSC_SUCCESS);
50 }
51 
52 /**
53   @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar.
54          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
55          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
56 
57   Collective across MPI processes.
58 
59   @param[in]      mat_ceed  `MATCEED` to assemble
60   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
61 
62   @return An error code: 0 - success, otherwise - failure
63 **/
64 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) {
65   MatCeedContext ctx;
66 
67   PetscFunctionBeginUser;
68   PetscCall(MatShellGetContext(mat_ceed, &ctx));
69 
70   // Check if COO pattern set
71   {
72     PetscInt index = -1;
73 
74     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
75       if (ctx->mats_assembled_pbd[i] == mat_coo) index = i;
76     }
77     if (index == -1) {
78       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
79       CeedInt      *rows_ceed, *cols_ceed;
80       PetscCount    num_entries;
81       PetscLogStage stage_amg_setup;
82 
83       // -- Assemble sparsity pattern if mat hasn't been assembled before
84       PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
85       if (stage_amg_setup == -1) {
86         PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
87       }
88       PetscCall(PetscLogStagePush(stage_amg_setup));
89       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
90       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
91       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
92       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
93       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
94       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
95       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
96       free(rows_petsc);
97       free(cols_petsc);
98       if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd));
99       PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd));
100       ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo;
101       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
102       PetscCall(PetscLogStagePop());
103     }
104   }
105 
106   // Assemble mat_ceed
107   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
108   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
109   {
110     const CeedScalar *values;
111     MatType           mat_type;
112     CeedMemType       mem_type = CEED_MEM_HOST;
113     PetscBool         is_spd, is_spd_known;
114 
115     PetscCall(MatGetType(mat_coo, &mat_type));
116     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
117     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
118     else mem_type = CEED_MEM_HOST;
119 
120     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
121     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE));
122     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
123     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values));
124     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
125     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
126     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
127     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values));
128   }
129   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
130   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /**
135   @brief Assemble inner `Mat` for diagonal `PC` operations
136 
137   Collective across MPI processes.
138 
139   @param[in]   mat_ceed      `MATCEED` to invert
140   @param[in]   use_ceed_pbd  Boolean flag to use libCEED PBD assembly
141   @param[out]  mat_inner     Inner `Mat` for diagonal operations
142 
143   @return An error code: 0 - success, otherwise - failure
144 **/
145 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) {
146   MatCeedContext ctx;
147 
148   PetscFunctionBeginUser;
149   PetscCall(MatShellGetContext(mat_ceed, &ctx));
150   if (use_ceed_pbd) {
151     // Check if COO pattern set
152     if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_pbd_internal));
153 
154     // Assemble mat_assembled_full_internal
155     PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal));
156     if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal;
157   } else {
158     // Check if COO pattern set
159     if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
160 
161     // Assemble mat_assembled_full_internal
162     PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
163     if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal;
164   }
165   PetscFunctionReturn(PETSC_SUCCESS);
166 }
167 
168 /**
169   @brief Get `MATCEED` diagonal block for Jacobi.
170 
171   Collective across MPI processes.
172 
173   @param[in]   mat_ceed   `MATCEED` to invert
174   @param[out]  mat_block  The diagonal block matrix
175 
176   @return An error code: 0 - success, otherwise - failure
177 **/
178 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) {
179   Mat            mat_inner = NULL;
180   MatCeedContext ctx;
181 
182   PetscFunctionBeginUser;
183   PetscCall(MatShellGetContext(mat_ceed, &ctx));
184 
185   // Assemble inner mat if needed
186   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
187 
188   // Get block diagonal
189   PetscCall(MatGetDiagonalBlock(mat_inner, mat_block));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 /**
194   @brief Invert `MATCEED` diagonal block for Jacobi.
195 
196   Collective across MPI processes.
197 
198   @param[in]   mat_ceed  `MATCEED` to invert
199   @param[out]  values    The block inverses in column major order
200 
201   @return An error code: 0 - success, otherwise - failure
202 **/
203 static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) {
204   Mat            mat_inner = NULL;
205   MatCeedContext ctx;
206 
207   PetscFunctionBeginUser;
208   PetscCall(MatShellGetContext(mat_ceed, &ctx));
209 
210   // Assemble inner mat if needed
211   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
212 
213   // Invert PB diagonal
214   PetscCall(MatInvertBlockDiagonal(mat_inner, values));
215   PetscFunctionReturn(PETSC_SUCCESS);
216 }
217 
218 /**
219   @brief Invert `MATCEED` variable diagonal block for Jacobi.
220 
221   Collective across MPI processes.
222 
223   @param[in]   mat_ceed     `MATCEED` to invert
224   @param[in]   num_blocks   The number of blocks on the process
225   @param[in]   block_sizes  The size of each block on the process
226   @param[out]  values       The block inverses in column major order
227 
228   @return An error code: 0 - success, otherwise - failure
229 **/
230 static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) {
231   Mat            mat_inner = NULL;
232   MatCeedContext ctx;
233 
234   PetscFunctionBeginUser;
235   PetscCall(MatShellGetContext(mat_ceed, &ctx));
236 
237   // Assemble inner mat if needed
238   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner));
239 
240   // Invert PB diagonal
241   PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values));
242   PetscFunctionReturn(PETSC_SUCCESS);
243 }
244 
245 /**
246   @brief View `MATCEED`.
247 
248   Collective across MPI processes.
249 
250   @param[in]   mat_ceed  `MATCEED` to view
251   @param[in]   viewer    The visualization context
252 
253   @return An error code: 0 - success, otherwise - failure
254 **/
255 static PetscErrorCode MatView_Ceed(Mat mat_ceed, PetscViewer viewer) {
256   PetscBool         is_ascii;
257   PetscViewerFormat format;
258   PetscMPIInt       size, rank;
259   MatCeedContext    ctx;
260 
261   PetscFunctionBeginUser;
262   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
263   PetscCall(MatShellGetContext(mat_ceed, &ctx));
264   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)mat_ceed), &viewer));
265 
266   PetscCall(PetscViewerGetFormat(viewer, &format));
267   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat_ceed), &size));
268   if (size == 1 && format == PETSC_VIEWER_LOAD_BALANCE) PetscFunctionReturn(PETSC_SUCCESS);
269 
270   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat_ceed), &rank));
271   if (rank != 0) PetscFunctionReturn(PETSC_SUCCESS);
272 
273   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &is_ascii));
274   {
275     PetscBool is_detailed     = format == PETSC_VIEWER_ASCII_INFO_DETAIL;
276     char      rank_string[16] = {'\0'};
277     FILE     *file;
278 
279     PetscCall(PetscViewerASCIIPrintf(viewer, "MatCEED:\n  Default COO MatType:%s\n", ctx->coo_mat_type));
280     PetscCall(PetscViewerASCIIGetPointer(viewer, &file));
281     PetscCall(PetscSNPrintf(rank_string, 16, "on Rank %d", rank));
282     PetscCall(PetscViewerASCIIPrintf(viewer, " CeedOperator Apply %s:\n", is_detailed ? rank_string : "Summary"));
283     if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult, file));
284     else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult, file));
285     if (ctx->op_mult_transpose) {
286       PetscCall(PetscViewerASCIIPrintf(viewer, "  CeedOperator ApplyTranspose %s:\n", is_detailed ? rank_string : "Summary"));
287       if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult_transpose, file));
288       else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult_transpose, file));
289     }
290   }
291   PetscFunctionReturn(PETSC_SUCCESS);
292 }
293 
294 // -----------------------------------------------------------------------------
295 // MatCeed
296 // -----------------------------------------------------------------------------
297 
298 /**
299   @brief Create PETSc `Mat` from libCEED operators.
300 
301   Collective across MPI processes.
302 
303   @param[in]   dm_x                      Input `DM`
304   @param[in]   dm_y                      Output `DM`
305   @param[in]   op_mult                   `CeedOperator` for forward evaluation
306   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
307   @param[out]  mat                        New MatCeed
308 
309   @return An error code: 0 - success, otherwise - failure
310 **/
311 PetscErrorCode MatCreateCeed(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) {
312   PetscInt       X_l_size, X_g_size, Y_l_size, Y_g_size;
313   VecType        vec_type;
314   MatCeedContext ctx;
315 
316   PetscFunctionBeginUser;
317   PetscCall(MatCeedRegisterLogEvents());
318 
319   // Collect context data
320   PetscCall(DMGetVecType(dm_x, &vec_type));
321   {
322     Vec X;
323 
324     PetscCall(DMGetGlobalVector(dm_x, &X));
325     PetscCall(VecGetSize(X, &X_g_size));
326     PetscCall(VecGetLocalSize(X, &X_l_size));
327     PetscCall(DMRestoreGlobalVector(dm_x, &X));
328   }
329   if (dm_y) {
330     Vec Y;
331 
332     PetscCall(DMGetGlobalVector(dm_y, &Y));
333     PetscCall(VecGetSize(Y, &Y_g_size));
334     PetscCall(VecGetLocalSize(Y, &Y_l_size));
335     PetscCall(DMRestoreGlobalVector(dm_y, &Y));
336   } else {
337     dm_y     = dm_x;
338     Y_g_size = X_g_size;
339     Y_l_size = X_l_size;
340   }
341 
342   // Create context
343   {
344     Vec X_loc, Y_loc_transpose = NULL;
345 
346     PetscCall(DMCreateLocalVector(dm_x, &X_loc));
347     PetscCall(VecZeroEntries(X_loc));
348     if (op_mult_transpose) {
349       PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose));
350       PetscCall(VecZeroEntries(Y_loc_transpose));
351     }
352     PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE,
353                                    MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE_CEEDOP, &ctx));
354     PetscCall(VecDestroy(&X_loc));
355     PetscCall(VecDestroy(&Y_loc_transpose));
356   }
357 
358   // Create mat
359   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat));
360   PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED));
361   // -- Set block and variable block sizes
362   if (dm_x == dm_y) {
363     MatType dm_mat_type, dm_mat_type_copy;
364     Mat     temp_mat;
365 
366     PetscCall(DMGetMatType(dm_x, &dm_mat_type));
367     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
368     PetscCall(DMSetMatType(dm_x, MATAIJ));
369     PetscCall(DMCreateMatrix(dm_x, &temp_mat));
370     PetscCall(DMSetMatType(dm_x, dm_mat_type_copy));
371     PetscCall(PetscFree(dm_mat_type_copy));
372 
373     {
374       PetscInt        block_size, num_blocks, max_vblock_size = PETSC_INT_MAX;
375       const PetscInt *vblock_sizes;
376 
377       // -- Get block sizes
378       PetscCall(MatGetBlockSize(temp_mat, &block_size));
379       PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes));
380       {
381         PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX};
382 
383         for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]);
384         PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max));
385         max_vblock_size = global_min_max[1];
386       }
387 
388       // -- Copy block sizes
389       if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size));
390       if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes));
391 
392       // -- Check libCEED compatibility
393       {
394         bool is_composite;
395 
396         ctx->is_ceed_pbd_valid  = PETSC_TRUE;
397         ctx->is_ceed_vpbd_valid = PETSC_TRUE;
398         PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite));
399         if (is_composite) {
400           CeedInt       num_sub_operators;
401           CeedOperator *sub_operators;
402 
403           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators));
404           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators));
405           for (CeedInt i = 0; i < num_sub_operators; i++) {
406             CeedInt                  num_bases, num_comp;
407             CeedBasis               *active_bases;
408             CeedOperatorAssemblyData assembly_data;
409 
410             PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data));
411             PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
412             PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
413             if (num_bases > 1) {
414               ctx->is_ceed_pbd_valid  = PETSC_FALSE;
415               ctx->is_ceed_vpbd_valid = PETSC_FALSE;
416             }
417             if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
418             if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
419           }
420         } else {
421           // LCOV_EXCL_START
422           CeedInt                  num_bases, num_comp;
423           CeedBasis               *active_bases;
424           CeedOperatorAssemblyData assembly_data;
425 
426           PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data));
427           PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
428           PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
429           if (num_bases > 1) {
430             ctx->is_ceed_pbd_valid  = PETSC_FALSE;
431             ctx->is_ceed_vpbd_valid = PETSC_FALSE;
432           }
433           if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
434           if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
435           // LCOV_EXCL_STOP
436         }
437         {
438           PetscInt local_is_valid[2], global_is_valid[2];
439 
440           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid;
441           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
442           ctx->is_ceed_pbd_valid = global_is_valid[0];
443           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid;
444           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
445           ctx->is_ceed_vpbd_valid = global_is_valid[0];
446         }
447       }
448     }
449     PetscCall(MatDestroy(&temp_mat));
450   }
451   // -- Set internal mat type
452   {
453     VecType vec_type;
454     MatType coo_mat_type;
455 
456     PetscCall(VecGetType(ctx->X_loc, &vec_type));
457     if (strstr(vec_type, VECCUDA)) coo_mat_type = MATAIJCUSPARSE;
458     else if (strstr(vec_type, VECKOKKOS)) coo_mat_type = MATAIJKOKKOS;
459     else coo_mat_type = MATAIJ;
460     PetscCall(PetscStrallocpy(coo_mat_type, &ctx->coo_mat_type));
461   }
462   // -- Set mat operations
463   PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
464   PetscCall(MatShellSetOperation(*mat, MATOP_VIEW, (void (*)(void))MatView_Ceed));
465   PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed));
466   if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
467   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
468   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
469   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
470   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
471   PetscCall(MatShellSetVecType(*mat, vec_type));
472   PetscFunctionReturn(PETSC_SUCCESS);
473 }
474 
475 /**
476   @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`.
477 
478   Collective across MPI processes.
479 
480   @param[in]   mat_ceed   `MATCEED` to copy from
481   @param[out]  mat_other  `MatShell` or `MATCEED` to copy into
482 
483   @return An error code: 0 - success, otherwise - failure
484 **/
485 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) {
486   PetscFunctionBeginUser;
487   PetscCall(MatCeedRegisterLogEvents());
488 
489   // Check type compatibility
490   {
491     PetscBool is_matceed = PETSC_FALSE, is_matshell = PETSC_FALSE;
492     MatType   mat_type_ceed, mat_type_other;
493 
494     PetscCall(MatGetType(mat_ceed, &mat_type_ceed));
495     PetscCall(PetscStrcmp(mat_type_ceed, MATCEED, &is_matceed));
496     PetscCheck(is_matceed, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED);
497     PetscCall(MatGetType(mat_other, &mat_type_other));
498     PetscCall(PetscStrcmp(mat_type_other, MATCEED, &is_matceed));
499     PetscCall(PetscStrcmp(mat_type_other, MATSHELL, &is_matceed));
500     PetscCheck(is_matceed || is_matshell, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_other must have type " MATCEED " or " MATSHELL);
501   }
502 
503   // Check dimension compatibility
504   {
505     PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size;
506 
507     PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size));
508     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size));
509     PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size));
510     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size));
511     PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) &&
512                    (X_l_ceed_size == X_l_other_size),
513                PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ,
514                "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT
515                "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT
516                ", %" PetscInt_FMT ")",
517                Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size);
518   }
519 
520   // Convert
521   {
522     VecType        vec_type;
523     MatCeedContext ctx;
524 
525     PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED));
526     PetscCall(MatShellGetContext(mat_ceed, &ctx));
527     PetscCall(MatCeedContextReference(ctx));
528     PetscCall(MatShellSetContext(mat_other, ctx));
529     PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
530     PetscCall(MatShellSetOperation(mat_other, MATOP_VIEW, (void (*)(void))MatView_Ceed));
531     PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed));
532     if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
533     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
534     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
535     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
536     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
537     {
538       PetscInt block_size;
539 
540       PetscCall(MatGetBlockSize(mat_ceed, &block_size));
541       if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size));
542     }
543     {
544       PetscInt        num_blocks;
545       const PetscInt *block_sizes;
546 
547       PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes));
548       if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes));
549     }
550     PetscCall(DMGetVecType(ctx->dm_x, &vec_type));
551     PetscCall(MatShellSetVecType(mat_other, vec_type));
552   }
553   PetscFunctionReturn(PETSC_SUCCESS);
554 }
555 
556 /**
557   @brief Mark Mark `CeedQFunction` data as updated and the `CeedQFunction` as requiring re-assembly for a `MatCEED`.
558 
559   Collective across MPI processes.
560 
561   @param[in]   mat_ceed       `MATCEED`
562   @param[out]  update_needed  Boolean flag indicating `CeedQFunction` update needed
563 
564   @return An error code: 0 - success, otherwise - failure
565 **/
566 PetscErrorCode MatCeedSetAssemblyDataUpdateNeeded(Mat mat_ceed, PetscBool update_needed) {
567   MatCeedContext ctx;
568 
569   PetscFunctionBeginUser;
570   PetscCall(MatShellGetContext(mat_ceed, &ctx));
571   PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult, update_needed));
572   if (ctx->op_mult_transpose) {
573     PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult_transpose, update_needed));
574   }
575   PetscFunctionReturn(PETSC_SUCCESS);
576 }
577 
578 /**
579   @brief Setup a `Mat` with the same COO pattern as a `MatCEED`.
580 
581   Collective across MPI processes.
582 
583   @param[in]   mat_ceed  `MATCEED`
584   @param[out]  mat_coo   Sparse `Mat` with same COO pattern
585 
586   @return An error code: 0 - success, otherwise - failure
587 **/
588 PetscErrorCode MatCeedCreateMatCOO(Mat mat_ceed, Mat *mat_coo) {
589   MatCeedContext ctx;
590 
591   PetscFunctionBeginUser;
592   PetscCall(MatShellGetContext(mat_ceed, &ctx));
593 
594   PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "COO assembly only supported for MATCEED on a single DM");
595 
596   // Check cl mat type
597   {
598     PetscBool is_coo_mat_type_cl = PETSC_FALSE;
599     char      coo_mat_type_cl[64];
600 
601     // Check for specific CL coo mat type for this Mat
602     {
603       const char *mat_ceed_prefix = NULL;
604 
605       PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix));
606       PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL);
607       PetscCall(PetscOptionsFList("-ceed_coo_mat_type", "Default MATCEED COO assembly MatType", NULL, MatList, coo_mat_type_cl, coo_mat_type_cl,
608                                   sizeof(coo_mat_type_cl), &is_coo_mat_type_cl));
609       PetscOptionsEnd();
610       if (is_coo_mat_type_cl) {
611         PetscCall(PetscFree(ctx->coo_mat_type));
612         PetscCall(PetscStrallocpy(coo_mat_type_cl, &ctx->coo_mat_type));
613       }
614     }
615   }
616 
617   // Create sparse matrix
618   {
619     MatType dm_mat_type, dm_mat_type_copy;
620 
621     PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type));
622     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
623     PetscCall(DMSetMatType(ctx->dm_x, ctx->coo_mat_type));
624     PetscCall(DMCreateMatrix(ctx->dm_x, mat_coo));
625     PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy));
626     PetscCall(PetscFree(dm_mat_type_copy));
627   }
628   PetscFunctionReturn(PETSC_SUCCESS);
629 }
630 
631 /**
632   @brief Setup the COO preallocation `MATCEED` into a `MATAIJ` or similar.
633          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
634 
635   Collective across MPI processes.
636 
637   @param[in]      mat_ceed  `MATCEED` to assemble
638   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
639 
640   @return An error code: 0 - success, otherwise - failure
641 **/
642 PetscErrorCode MatCeedSetPreallocationCOO(Mat mat_ceed, Mat mat_coo) {
643   MatCeedContext ctx;
644 
645   PetscFunctionBeginUser;
646   PetscCall(MatShellGetContext(mat_ceed, &ctx));
647 
648   {
649     PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
650     CeedInt      *rows_ceed, *cols_ceed;
651     PetscCount    num_entries;
652     PetscLogStage stage_amg_setup;
653 
654     // -- Assemble sparsity pattern if mat hasn't been assembled before
655     PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
656     if (stage_amg_setup == -1) {
657       PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
658     }
659     PetscCall(PetscLogStagePush(stage_amg_setup));
660     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
661     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
662     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
663     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
664     PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
665     PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
666     PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
667     free(rows_petsc);
668     free(cols_petsc);
669     if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full));
670     PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full));
671     ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo;
672     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
673     PetscCall(PetscLogStagePop());
674   }
675   PetscFunctionReturn(PETSC_SUCCESS);
676 }
677 
678 /**
679   @brief Assemble a `MATCEED` into a `MATAIJ` or similar.
680          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
681          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
682 
683   Collective across MPI processes.
684 
685   @param[in]      mat_ceed  `MATCEED` to assemble
686   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
687 
688   @return An error code: 0 - success, otherwise - failure
689 **/
690 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) {
691   MatCeedContext ctx;
692 
693   PetscFunctionBeginUser;
694   PetscCall(MatShellGetContext(mat_ceed, &ctx));
695 
696   // Set COO pattern if needed
697   {
698     CeedInt index = -1;
699 
700     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
701       if (ctx->mats_assembled_full[i] == mat_coo) index = i;
702     }
703     if (index == -1) PetscCall(MatCeedSetPreallocationCOO(mat_ceed, mat_coo));
704   }
705 
706   // Assemble mat_ceed
707   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
708   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
709   {
710     const CeedScalar *values;
711     MatType           mat_type;
712     CeedMemType       mem_type = CEED_MEM_HOST;
713     PetscBool         is_spd, is_spd_known;
714 
715     PetscCall(MatGetType(mat_coo, &mat_type));
716     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
717     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
718     else mem_type = CEED_MEM_HOST;
719 
720     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
721     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full));
722     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
723     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values));
724     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
725     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
726     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
727     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values));
728   }
729   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
730   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
731   PetscFunctionReturn(PETSC_SUCCESS);
732 }
733 
734 /**
735   @brief Set the current value of a context field for a `MatCEED`.
736 
737   Not collective across MPI processes.
738 
739   @param[in,out]  mat    `MatCEED`
740   @param[in]      name   Name of the context field
741   @param[in]      value  New context field value
742 
743   @return An error code: 0 - success, otherwise - failure
744 **/
745 PetscErrorCode MatCeedSetContextDouble(Mat mat, const char *name, double value) {
746   PetscBool      was_updated = PETSC_FALSE;
747   MatCeedContext ctx;
748 
749   PetscFunctionBeginUser;
750   PetscCall(MatShellGetContext(mat, &ctx));
751   {
752     CeedContextFieldLabel label = NULL;
753 
754     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult, name, &label));
755     if (label) {
756       double set_value = 2 * value + 1.0;
757 
758       PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
759       if (set_value != value) {
760         PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult, label, &value));
761         was_updated = PETSC_TRUE;
762       }
763     }
764     if (ctx->op_mult_transpose) {
765       label = NULL;
766       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult_transpose, name, &label));
767       if (label) {
768         double set_value = 2 * value + 1.0;
769 
770         PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
771         if (set_value != value) {
772           PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult_transpose, label, &value));
773           was_updated = PETSC_TRUE;
774         }
775       }
776     }
777   }
778   if (was_updated) {
779     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
780     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
781   }
782   PetscFunctionReturn(PETSC_SUCCESS);
783 }
784 
785 /**
786   @brief Get the current value of a context field for a `MatCEED`.
787 
788   Not collective across MPI processes.
789 
790   @param[in]   mat    `MatCEED`
791   @param[in]   name   Name of the context field
792   @param[out]  value  Current context field value
793 
794   @return An error code: 0 - success, otherwise - failure
795 **/
796 PetscErrorCode MatCeedGetContextDouble(Mat mat, const char *name, double *value) {
797   MatCeedContext ctx;
798 
799   PetscFunctionBeginUser;
800   PetscCall(MatShellGetContext(mat, &ctx));
801   {
802     CeedContextFieldLabel label = NULL;
803     CeedOperator          op    = ctx->op_mult;
804 
805     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
806     if (!label && ctx->op_mult_transpose) {
807       op = ctx->op_mult_transpose;
808       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
809     }
810     if (label) {
811       PetscSizeT    num_values;
812       const double *values_ceed;
813 
814       PetscCallCeed(ctx->ceed, CeedOperatorGetContextDoubleRead(op, label, &num_values, &values_ceed));
815       *value = values_ceed[0];
816       PetscCallCeed(ctx->ceed, CeedOperatorRestoreContextDoubleRead(op, label, &values_ceed));
817     }
818   }
819   PetscFunctionReturn(PETSC_SUCCESS);
820 }
821 
822 /**
823   @brief Set the current `PetscReal` value of a context field for a `MatCEED`.
824 
825   Not collective across MPI processes.
826 
827   @param[in,out]  mat    `MatCEED`
828   @param[in]      name   Name of the context field
829   @param[in]      value  New context field value
830 
831   @return An error code: 0 - success, otherwise - failure
832 **/
833 PetscErrorCode MatCeedSetContextReal(Mat mat, const char *name, PetscReal value) {
834   double value_double = value;
835 
836   PetscFunctionBeginUser;
837   PetscCall(MatCeedSetContextDouble(mat, name, value_double));
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
841 /**
842   @brief Get the current `PetscReal` value of a context field for a `MatCEED`.
843 
844   Not collective across MPI processes.
845 
846   @param[in]   mat    `MatCEED`
847   @param[in]   name   Name of the context field
848   @param[out]  value  Current context field value
849 
850   @return An error code: 0 - success, otherwise - failure
851 **/
852 PetscErrorCode MatCeedGetContextReal(Mat mat, const char *name, PetscReal *value) {
853   double value_double = 0.0;
854 
855   PetscFunctionBeginUser;
856   PetscCall(MatCeedGetContextDouble(mat, name, &value_double));
857   *value = value_double;
858   PetscFunctionReturn(PETSC_SUCCESS);
859 }
860 
861 /**
862   @brief Set the current time for a `MatCEED`.
863 
864   Not collective across MPI processes.
865 
866   @param[in,out]  mat   `MatCEED`
867   @param[in]      time  Current time
868 
869   @return An error code: 0 - success, otherwise - failure
870 **/
871 PetscErrorCode MatCeedSetTime(Mat mat, PetscReal time) {
872   PetscFunctionBeginUser;
873   {
874     double time_ceed = time;
875 
876     PetscCall(MatCeedSetContextDouble(mat, "time", time_ceed));
877   }
878   PetscFunctionReturn(PETSC_SUCCESS);
879 }
880 
881 /**
882   @brief Get the current time for a `MatCEED`.
883 
884   Not collective across MPI processes.
885 
886   @param[in]   mat   `MatCEED`
887   @param[out]  time  Current time, or -1.0 if the boundary evaluator has no time field
888 
889   @return An error code: 0 - success, otherwise - failure
890 **/
891 PetscErrorCode MatCeedGetTime(Mat mat, PetscReal *time) {
892   PetscFunctionBeginUser;
893   *time = -1.0;
894   {
895     double time_ceed = -1.0;
896 
897     PetscCall(MatCeedGetContextDouble(mat, "time", &time_ceed));
898     *time = time_ceed;
899   }
900   PetscFunctionReturn(PETSC_SUCCESS);
901 }
902 
903 /**
904   @brief Set the current time step for a `MatCEED`.
905 
906   Not collective across MPI processes.
907 
908   @param[in,out]  mat  `MatCEED`
909   @param[in]      dt   Current time step
910 
911   @return An error code: 0 - success, otherwise - failure
912 **/
913 PetscErrorCode MatCeedSetDt(Mat mat, PetscReal dt) {
914   PetscFunctionBeginUser;
915   {
916     double dt_ceed = dt;
917 
918     PetscCall(MatCeedSetContextDouble(mat, "dt", dt_ceed));
919   }
920   PetscFunctionReturn(PETSC_SUCCESS);
921 }
922 
923 /**
924   @brief Set the Jacobian shifts for a `MatCEED`.
925 
926   Not collective across MPI processes.
927 
928   @param[in,out]  mat      `MatCEED`
929   @param[in]      shift_v  Velocity shift
930   @param[in]      shift_a  Acceleration shift
931 
932   @return An error code: 0 - success, otherwise - failure
933 **/
934 PetscErrorCode MatCeedSetShifts(Mat mat, PetscReal shift_v, PetscReal shift_a) {
935   PetscFunctionBeginUser;
936   {
937     double shift_v_ceed = shift_v;
938 
939     PetscCall(MatCeedSetContextDouble(mat, "shift v", shift_v_ceed));
940   }
941   if (shift_a) {
942     double shift_a_ceed = shift_a;
943 
944     PetscCall(MatCeedSetContextDouble(mat, "shift a", shift_a_ceed));
945   }
946   PetscFunctionReturn(PETSC_SUCCESS);
947 }
948 
949 /**
950   @brief Set user context for a `MATCEED`.
951 
952   Collective across MPI processes.
953 
954   @param[in,out]  mat  `MATCEED`
955   @param[in]      f    The context destroy function, or NULL
956   @param[in]      ctx  User context, or NULL to unset
957 
958   @return An error code: 0 - success, otherwise - failure
959 **/
960 PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) {
961   PetscContainer user_ctx = NULL;
962 
963   PetscFunctionBeginUser;
964   if (ctx) {
965     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx));
966     PetscCall(PetscContainerSetPointer(user_ctx, ctx));
967     PetscCall(PetscContainerSetUserDestroy(user_ctx, f));
968   }
969   PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx));
970   PetscCall(PetscContainerDestroy(&user_ctx));
971   PetscFunctionReturn(PETSC_SUCCESS);
972 }
973 
974 /**
975   @brief Retrieve the user context for a `MATCEED`.
976 
977   Collective across MPI processes.
978 
979   @param[in,out]  mat  `MATCEED`
980   @param[in]      ctx  User context
981 
982   @return An error code: 0 - success, otherwise - failure
983 **/
984 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) {
985   PetscContainer user_ctx;
986 
987   PetscFunctionBeginUser;
988   PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx));
989   if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx));
990   else *(void **)ctx = NULL;
991   PetscFunctionReturn(PETSC_SUCCESS);
992 }
993 /**
994   @brief Set a user defined matrix operation for a `MATCEED` matrix.
995 
996   Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by
997 `MatCeedSetContext()`.
998 
999   Collective across MPI processes.
1000 
1001   @param[in,out]  mat  `MATCEED`
1002   @param[in]      op   Name of the `MatOperation`
1003   @param[in]      g    Function that provides the operation
1004 
1005   @return An error code: 0 - success, otherwise - failure
1006 **/
1007 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) {
1008   PetscFunctionBeginUser;
1009   PetscCall(MatShellSetOperation(mat, op, g));
1010   PetscFunctionReturn(PETSC_SUCCESS);
1011 }
1012 
1013 /**
1014   @brief Sets the default COO matrix type as a string from the `MATCEED`.
1015 
1016   Collective across MPI processes.
1017 
1018   @param[in,out]  mat   `MATCEED`
1019   @param[in]      type  COO `MatType` to set
1020 
1021   @return An error code: 0 - success, otherwise - failure
1022 **/
1023 PetscErrorCode MatCeedSetCOOMatType(Mat mat, MatType type) {
1024   MatCeedContext ctx;
1025 
1026   PetscFunctionBeginUser;
1027   PetscCall(MatShellGetContext(mat, &ctx));
1028   // Check if same
1029   {
1030     size_t    len_old, len_new;
1031     PetscBool is_same = PETSC_FALSE;
1032 
1033     PetscCall(PetscStrlen(ctx->coo_mat_type, &len_old));
1034     PetscCall(PetscStrlen(type, &len_new));
1035     if (len_old == len_new) PetscCall(PetscStrncmp(ctx->coo_mat_type, type, len_old, &is_same));
1036     if (is_same) PetscFunctionReturn(PETSC_SUCCESS);
1037   }
1038   // Clean up old mats in different format
1039   // LCOV_EXCL_START
1040   if (ctx->mat_assembled_full_internal) {
1041     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
1042       if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) {
1043         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) {
1044           ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j];
1045         }
1046         ctx->num_mats_assembled_full--;
1047         // Note: we'll realloc this array again, so no need to shrink the allocation
1048         PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1049       }
1050     }
1051   }
1052   if (ctx->mat_assembled_pbd_internal) {
1053     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
1054       if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) {
1055         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) {
1056           ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j];
1057         }
1058         // Note: we'll realloc this array again, so no need to shrink the allocation
1059         ctx->num_mats_assembled_pbd--;
1060         PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1061       }
1062     }
1063   }
1064   PetscCall(PetscFree(ctx->coo_mat_type));
1065   PetscCall(PetscStrallocpy(type, &ctx->coo_mat_type));
1066   PetscFunctionReturn(PETSC_SUCCESS);
1067   // LCOV_EXCL_STOP
1068 }
1069 
1070 /**
1071   @brief Gets the default COO matrix type as a string from the `MATCEED`.
1072 
1073   Collective across MPI processes.
1074 
1075   @param[in,out]  mat   `MATCEED`
1076   @param[in]      type  COO `MatType`
1077 
1078   @return An error code: 0 - success, otherwise - failure
1079 **/
1080 PetscErrorCode MatCeedGetCOOMatType(Mat mat, MatType *type) {
1081   MatCeedContext ctx;
1082 
1083   PetscFunctionBeginUser;
1084   PetscCall(MatShellGetContext(mat, &ctx));
1085   *type = ctx->coo_mat_type;
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 /**
1090   @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1091 
1092   Not collective across MPI processes.
1093 
1094   @param[in,out]  mat              `MATCEED`
1095   @param[in]      X_loc            Input PETSc local vector, or NULL
1096   @param[in]      Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1097 
1098   @return An error code: 0 - success, otherwise - failure
1099 **/
1100 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) {
1101   MatCeedContext ctx;
1102 
1103   PetscFunctionBeginUser;
1104   PetscCall(MatShellGetContext(mat, &ctx));
1105   if (X_loc) {
1106     PetscInt len_old, len_new;
1107 
1108     PetscCall(VecGetSize(ctx->X_loc, &len_old));
1109     PetscCall(VecGetSize(X_loc, &len_new));
1110     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT,
1111                len_new, len_old);
1112     PetscCall(VecReferenceCopy(X_loc, &ctx->X_loc));
1113   }
1114   if (Y_loc_transpose) {
1115     PetscInt len_old, len_new;
1116 
1117     PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old));
1118     PetscCall(VecGetSize(Y_loc_transpose, &len_new));
1119     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB,
1120                "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old);
1121     PetscCall(VecReferenceCopy(Y_loc_transpose, &ctx->Y_loc_transpose));
1122   }
1123   PetscFunctionReturn(PETSC_SUCCESS);
1124 }
1125 
1126 /**
1127   @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1128 
1129   Not collective across MPI processes.
1130 
1131   @param[in,out]  mat              `MATCEED`
1132   @param[out]     X_loc            Input PETSc local vector, or NULL
1133   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1134 
1135   @return An error code: 0 - success, otherwise - failure
1136 **/
1137 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1138   MatCeedContext ctx;
1139 
1140   PetscFunctionBeginUser;
1141   PetscCall(MatShellGetContext(mat, &ctx));
1142   if (X_loc) {
1143     *X_loc = NULL;
1144     PetscCall(VecReferenceCopy(ctx->X_loc, X_loc));
1145   }
1146   if (Y_loc_transpose) {
1147     *Y_loc_transpose = NULL;
1148     PetscCall(VecReferenceCopy(ctx->Y_loc_transpose, Y_loc_transpose));
1149   }
1150   PetscFunctionReturn(PETSC_SUCCESS);
1151 }
1152 
1153 /**
1154   @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1155 
1156   Not collective across MPI processes.
1157 
1158   @param[in,out]  mat              MatCeed
1159   @param[out]     X_loc            Input PETSc local vector, or NULL
1160   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1161 
1162   @return An error code: 0 - success, otherwise - failure
1163 **/
1164 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1165   PetscFunctionBeginUser;
1166   if (X_loc) PetscCall(VecDestroy(X_loc));
1167   if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose));
1168   PetscFunctionReturn(PETSC_SUCCESS);
1169 }
1170 
1171 /**
1172   @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1173 
1174   Not collective across MPI processes.
1175 
1176   @param[in,out]  mat                MatCeed
1177   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1178   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1179 
1180   @return An error code: 0 - success, otherwise - failure
1181 **/
1182 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1183   MatCeedContext ctx;
1184 
1185   PetscFunctionBeginUser;
1186   PetscCall(MatShellGetContext(mat, &ctx));
1187   if (op_mult) {
1188     *op_mult = NULL;
1189     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult));
1190   }
1191   if (op_mult_transpose) {
1192     *op_mult_transpose = NULL;
1193     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose));
1194   }
1195   PetscFunctionReturn(PETSC_SUCCESS);
1196 }
1197 
1198 /**
1199   @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1200 
1201   Not collective across MPI processes.
1202 
1203   @param[in,out]  mat                MatCeed
1204   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1205   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1206 
1207   @return An error code: 0 - success, otherwise - failure
1208 **/
1209 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1210   MatCeedContext ctx;
1211 
1212   PetscFunctionBeginUser;
1213   PetscCall(MatShellGetContext(mat, &ctx));
1214   if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult));
1215   if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose));
1216   PetscFunctionReturn(PETSC_SUCCESS);
1217 }
1218 
1219 /**
1220   @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1221 
1222   Not collective across MPI processes.
1223 
1224   @param[in,out]  mat                       MatCeed
1225   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1226   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1227 
1228   @return An error code: 0 - success, otherwise - failure
1229 **/
1230 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1231   MatCeedContext ctx;
1232 
1233   PetscFunctionBeginUser;
1234   PetscCall(MatShellGetContext(mat, &ctx));
1235   if (log_event_mult) ctx->log_event_mult = log_event_mult;
1236   if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose;
1237   PetscFunctionReturn(PETSC_SUCCESS);
1238 }
1239 
1240 /**
1241   @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1242 
1243   Not collective across MPI processes.
1244 
1245   @param[in,out]  mat                       MatCeed
1246   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1247   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1248 
1249   @return An error code: 0 - success, otherwise - failure
1250 **/
1251 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1252   MatCeedContext ctx;
1253 
1254   PetscFunctionBeginUser;
1255   PetscCall(MatShellGetContext(mat, &ctx));
1256   if (log_event_mult) *log_event_mult = ctx->log_event_mult;
1257   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose;
1258   PetscFunctionReturn(PETSC_SUCCESS);
1259 }
1260 
1261 /**
1262   @brief Set `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1263 
1264   Not collective across MPI processes.
1265 
1266   @param[in,out]  mat                       MatCeed
1267   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1268   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1269 
1270   @return An error code: 0 - success, otherwise - failure
1271 **/
1272 PetscErrorCode MatCeedSetCeedOperatorLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1273   MatCeedContext ctx;
1274 
1275   PetscFunctionBeginUser;
1276   PetscCall(MatShellGetContext(mat, &ctx));
1277   if (log_event_mult) ctx->log_event_ceed_mult = log_event_mult;
1278   if (log_event_mult_transpose) ctx->log_event_ceed_mult_transpose = log_event_mult_transpose;
1279   PetscFunctionReturn(PETSC_SUCCESS);
1280 }
1281 
1282 /**
1283   @brief Get `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1284 
1285   Not collective across MPI processes.
1286 
1287   @param[in,out]  mat                       MatCeed
1288   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1289   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1290 
1291   @return An error code: 0 - success, otherwise - failure
1292 **/
1293 PetscErrorCode MatCeedGetCeedOperatorLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1294   MatCeedContext ctx;
1295 
1296   PetscFunctionBeginUser;
1297   PetscCall(MatShellGetContext(mat, &ctx));
1298   if (log_event_mult) *log_event_mult = ctx->log_event_ceed_mult;
1299   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_ceed_mult_transpose;
1300   PetscFunctionReturn(PETSC_SUCCESS);
1301 }
1302 
1303 // -----------------------------------------------------------------------------
1304 // Operator context data
1305 // -----------------------------------------------------------------------------
1306 
1307 /**
1308   @brief Setup context data for operator application.
1309 
1310   Collective across MPI processes.
1311 
1312   @param[in]   dm_x                           Input `DM`
1313   @param[in]   dm_y                           Output `DM`
1314   @param[in]   X_loc                          Input PETSc local vector, or NULL
1315   @param[in]   Y_loc_transpose                Input PETSc local vector for transpose operation, or NULL
1316   @param[in]   op_mult                        `CeedOperator` for forward evaluation
1317   @param[in]   op_mult_transpose              `CeedOperator` for transpose evaluation
1318   @param[in]   log_event_mult                 `PetscLogEvent` for forward evaluation
1319   @param[in]   log_event_mult_transpose       `PetscLogEvent` for transpose evaluation
1320   @param[in]   log_event_ceed_mult            `PetscLogEvent` for forward `CeedOperator` evaluation
1321   @param[in]   log_event_ceed_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation
1322   @param[out]  ctx                            Context data for operator evaluation
1323 
1324   @return An error code: 0 - success, otherwise - failure
1325 **/
1326 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose,
1327                                     PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, PetscLogEvent log_event_ceed_mult,
1328                                     PetscLogEvent log_event_ceed_mult_transpose, MatCeedContext *ctx) {
1329   CeedSize x_loc_len, y_loc_len;
1330 
1331   PetscFunctionBeginUser;
1332 
1333   // Allocate
1334   PetscCall(PetscNew(ctx));
1335   (*ctx)->ref_count = 1;
1336 
1337   // Logging
1338   (*ctx)->log_event_mult                = log_event_mult;
1339   (*ctx)->log_event_mult_transpose      = log_event_mult_transpose;
1340   (*ctx)->log_event_ceed_mult           = log_event_ceed_mult;
1341   (*ctx)->log_event_ceed_mult_transpose = log_event_ceed_mult_transpose;
1342 
1343   // PETSc objects
1344   PetscCall(DMReferenceCopy(dm_x, &(*ctx)->dm_x));
1345   PetscCall(DMReferenceCopy(dm_y, &(*ctx)->dm_y));
1346   if (X_loc) PetscCall(VecReferenceCopy(X_loc, &(*ctx)->X_loc));
1347   if (Y_loc_transpose) PetscCall(VecReferenceCopy(Y_loc_transpose, &(*ctx)->Y_loc_transpose));
1348 
1349   // Memtype
1350   {
1351     const PetscScalar *x;
1352     Vec                X;
1353 
1354     PetscCall(DMGetLocalVector(dm_x, &X));
1355     PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type));
1356     PetscCall(VecRestoreArrayReadAndMemType(X, &x));
1357     PetscCall(DMRestoreLocalVector(dm_x, &X));
1358   }
1359 
1360   // libCEED objects
1361   PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB,
1362              "retrieving Ceed context object failed");
1363   PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed));
1364   PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len));
1365   PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult));
1366   if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose));
1367   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc));
1368   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc));
1369 
1370   // Flop counting
1371   {
1372     CeedSize ceed_flops_estimate = 0;
1373 
1374     PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate));
1375     (*ctx)->flops_mult = ceed_flops_estimate;
1376     if (op_mult_transpose) {
1377       PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate));
1378       (*ctx)->flops_mult_transpose = ceed_flops_estimate;
1379     }
1380   }
1381 
1382   // Check sizes
1383   if (x_loc_len > 0 || y_loc_len > 0) {
1384     CeedSize ctx_x_loc_len, ctx_y_loc_len;
1385     PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len;
1386     Vec      dm_X_loc, dm_Y_loc;
1387 
1388     // -- Input
1389     PetscCall(DMGetLocalVector(dm_x, &dm_X_loc));
1390     PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len));
1391     PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc));
1392     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len));
1393     if (X_loc) {
1394       PetscCall(VecGetLocalSize(X_loc, &X_loc_len));
1395       PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1396                  "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len);
1397     }
1398     PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions",
1399                x_loc_len, dm_x_loc_len);
1400     PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")",
1401                x_loc_len, ctx_x_loc_len);
1402 
1403     // -- Output
1404     PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc));
1405     PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len));
1406     PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc));
1407     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len));
1408     PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions",
1409                ctx_y_loc_len, dm_y_loc_len);
1410 
1411     // -- Transpose
1412     if (Y_loc_transpose) {
1413       PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len));
1414       PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1415                  "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len);
1416     }
1417   }
1418   PetscFunctionReturn(PETSC_SUCCESS);
1419 }
1420 
1421 /**
1422   @brief Increment reference counter for `MATCEED` context.
1423 
1424   Not collective across MPI processes.
1425 
1426   @param[in,out]  ctx  Context data
1427 
1428   @return An error code: 0 - success, otherwise - failure
1429 **/
1430 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) {
1431   PetscFunctionBeginUser;
1432   ctx->ref_count++;
1433   PetscFunctionReturn(PETSC_SUCCESS);
1434 }
1435 
1436 /**
1437   @brief Copy reference for `MATCEED`.
1438          Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`.
1439 
1440   Not collective across MPI processes.
1441 
1442   @param[in]   ctx       Context data
1443   @param[out]  ctx_copy  Copy of pointer to context data
1444 
1445   @return An error code: 0 - success, otherwise - failure
1446 **/
1447 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) {
1448   PetscFunctionBeginUser;
1449   PetscCall(MatCeedContextReference(ctx));
1450   PetscCall(MatCeedContextDestroy(*ctx_copy));
1451   *ctx_copy = ctx;
1452   PetscFunctionReturn(PETSC_SUCCESS);
1453 }
1454 
1455 /**
1456   @brief Destroy context data for operator application.
1457 
1458   Collective across MPI processes.
1459 
1460   @param[in,out]  ctx  Context data for operator evaluation
1461 
1462   @return An error code: 0 - success, otherwise - failure
1463 **/
1464 PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) {
1465   PetscFunctionBeginUser;
1466   if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS);
1467 
1468   // PETSc objects
1469   PetscCall(DMDestroy(&ctx->dm_x));
1470   PetscCall(DMDestroy(&ctx->dm_y));
1471   PetscCall(VecDestroy(&ctx->X_loc));
1472   PetscCall(VecDestroy(&ctx->Y_loc_transpose));
1473   PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1474   PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1475   PetscCall(PetscFree(ctx->coo_mat_type));
1476   PetscCall(PetscFree(ctx->mats_assembled_full));
1477   PetscCall(PetscFree(ctx->mats_assembled_pbd));
1478 
1479   // libCEED objects
1480   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc));
1481   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc));
1482   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full));
1483   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd));
1484   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult));
1485   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose));
1486   PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed");
1487 
1488   // Deallocate
1489   ctx->is_destroyed = PETSC_TRUE;  // Flag as destroyed in case someone has stale ref
1490   PetscCall(PetscFree(ctx));
1491   PetscFunctionReturn(PETSC_SUCCESS);
1492 }
1493 
1494 /**
1495   @brief Compute the diagonal of an operator via libCEED.
1496 
1497   Collective across MPI processes.
1498 
1499   @param[in]   A  `MATCEED`
1500   @param[out]  D  Vector holding operator diagonal
1501 
1502   @return An error code: 0 - success, otherwise - failure
1503 **/
1504 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) {
1505   PetscMemType   mem_type;
1506   Vec            D_loc;
1507   MatCeedContext ctx;
1508 
1509   PetscFunctionBeginUser;
1510   PetscCall(MatShellGetContext(A, &ctx));
1511 
1512   // Place PETSc vector in libCEED vector
1513   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1514   PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc));
1515   PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc));
1516 
1517   // Compute Diagonal
1518   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1519   PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1520   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1521 
1522   // Restore PETSc vector
1523   PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc));
1524 
1525   // Local-to-Global
1526   PetscCall(VecZeroEntries(D));
1527   PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D));
1528   PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc));
1529   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1530   PetscFunctionReturn(PETSC_SUCCESS);
1531 }
1532 
1533 /**
1534   @brief Compute `A X = Y` for a `MATCEED`.
1535 
1536   Collective across MPI processes.
1537 
1538   @param[in]   A  `MATCEED`
1539   @param[in]   X  Input PETSc vector
1540   @param[out]  Y  Output PETSc vector
1541 
1542   @return An error code: 0 - success, otherwise - failure
1543 **/
1544 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) {
1545   MatCeedContext ctx;
1546 
1547   PetscFunctionBeginUser;
1548   PetscCall(MatShellGetContext(A, &ctx));
1549   PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, NULL));
1550 
1551   {
1552     PetscMemType x_mem_type, y_mem_type;
1553     Vec          X_loc = ctx->X_loc, Y_loc;
1554 
1555     // Get local vectors
1556     if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1557     PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1558 
1559     // Global-to-local
1560     PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc));
1561 
1562     // Setup libCEED vectors
1563     PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1564     PetscCall(VecZeroEntries(Y_loc));
1565     PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1566 
1567     // Apply libCEED operator
1568     PetscCall(PetscLogGpuTimeBegin());
1569     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult, A, X, Y, NULL));
1570     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE));
1571     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult, A, X, Y, NULL));
1572     PetscCall(PetscLogGpuTimeEnd());
1573 
1574     // Restore PETSc vectors
1575     PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1576     PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1577 
1578     // Local-to-global
1579     PetscCall(VecZeroEntries(Y));
1580     PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y));
1581 
1582     // Restore local vectors, as needed
1583     if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1584     PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1585   }
1586 
1587   // Log flops
1588   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult));
1589   else PetscCall(PetscLogFlops(ctx->flops_mult));
1590   PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, NULL));
1591   PetscFunctionReturn(PETSC_SUCCESS);
1592 }
1593 
1594 /**
1595   @brief Compute `A^T Y = X` for a `MATCEED`.
1596 
1597   Collective across MPI processes.
1598 
1599   @param[in]   A  `MATCEED`
1600   @param[in]   Y  Input PETSc vector
1601   @param[out]  X  Output PETSc vector
1602 
1603   @return An error code: 0 - success, otherwise - failure
1604 **/
1605 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) {
1606   MatCeedContext ctx;
1607 
1608   PetscFunctionBeginUser;
1609   PetscCall(MatShellGetContext(A, &ctx));
1610   PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, NULL));
1611 
1612   {
1613     PetscMemType x_mem_type, y_mem_type;
1614     Vec          X_loc, Y_loc = ctx->Y_loc_transpose;
1615 
1616     // Get local vectors
1617     if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1618     PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1619 
1620     // Global-to-local
1621     PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc));
1622 
1623     // Setup libCEED vectors
1624     PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1625     PetscCall(VecZeroEntries(X_loc));
1626     PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1627 
1628     // Apply libCEED operator
1629     PetscCall(PetscLogGpuTimeBegin());
1630     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1631     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1632     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1633     PetscCall(PetscLogGpuTimeEnd());
1634 
1635     // Restore PETSc vectors
1636     PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1637     PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1638 
1639     // Local-to-global
1640     PetscCall(VecZeroEntries(X));
1641     PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X));
1642 
1643     // Restore local vectors, as needed
1644     if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1645     PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1646   }
1647 
1648   // Log flops
1649   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose));
1650   else PetscCall(PetscLogFlops(ctx->flops_mult_transpose));
1651   PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, NULL));
1652   PetscFunctionReturn(PETSC_SUCCESS);
1653 }
1654