xref: /libCEED/examples/fluids/src/mat-ceed.c (revision 097cc79570fc1ccd17774d2bf9cd5e51d3925370)
1 /// @file
2 /// MatCEED implementation
3 
4 #include <ceed.h>
5 #include <ceed/backend.h>
6 #include <mat-ceed-impl.h>
7 #include <mat-ceed.h>
8 #include <petsc-ceed-utils.h>
9 #include <petsc-ceed.h>
10 #include <petscdmplex.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 PetscClassId  MATCEED_CLASSID;
15 PetscLogEvent MATCEED_MULT, MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE, MATCEED_MULT_TRANSPOSE_CEEDOP, MATCEED_ASSEMBLE_DIAGONAL,
16     MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, MATCEED_SETUP_PBDIAGONAL, MATCEED_SETUP_PBDIAGONAL_CEEDOP, MATCEED_ASSEMBLE_PBDIAGONAL,
17     MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, MATCEED_SETUP_FULL, MATCEED_SETUP_FULL_CEEDOP, MATCEED_ASSEMBLE_FULL, MATCEED_ASSEMBLE_FULL_CEEDOP;
18 
19 /**
20   @brief Register MATCEED log events.
21 
22   Not collective across MPI processes.
23 
24   @return An error code: 0 - success, otherwise - failure
25 **/
26 static PetscErrorCode MatCeedRegisterLogEvents() {
27   static PetscBool registered = PETSC_FALSE;
28 
29   PetscFunctionBeginUser;
30   if (registered) PetscFunctionReturn(PETSC_SUCCESS);
31   PetscCall(PetscClassIdRegister("MatCEED", &MATCEED_CLASSID));
32   PetscCall(PetscLogEventRegister("MatCEEDMul", MATCEED_CLASSID, &MATCEED_MULT));
33   PetscCall(PetscLogEventRegister("MatCEEDMulCeed", MATCEED_CLASSID, &MATCEED_MULT_CEEDOP));
34   PetscCall(PetscLogEventRegister("MatCEEDMulT", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE));
35   PetscCall(PetscLogEventRegister("MatCEEDMulTCeed", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE_CEEDOP));
36   PetscCall(PetscLogEventRegister("MatCEEDAsmDiag", MATCEED_CLASSID, &MATCEED_ASSEMBLE_DIAGONAL));
37   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSU", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL));
38   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL_CEEDOP));
39   PetscCall(PetscLogEventRegister("MatCEEDAsmPBD", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL));
40   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP));
41   PetscCall(PetscLogEventRegister("MatCEEDAsmSU", MATCEED_CLASSID, &MATCEED_SETUP_FULL));
42   PetscCall(PetscLogEventRegister("MatCEEDAsmSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_FULL_CEEDOP));
43   PetscCall(PetscLogEventRegister("MatCEEDAsm", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL));
44   PetscCall(PetscLogEventRegister("MatCEEDAsmCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL_CEEDOP));
45   registered = PETSC_TRUE;
46   PetscFunctionReturn(PETSC_SUCCESS);
47 }
48 
49 /**
50   @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar.
51          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
52          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
53 
54   Collective across MPI processes.
55 
56   @param[in]      mat_ceed  `MATCEED` to assemble
57   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
58 
59   @return An error code: 0 - success, otherwise - failure
60 **/
61 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) {
62   MatCeedContext ctx;
63 
64   PetscFunctionBeginUser;
65   PetscCall(MatShellGetContext(mat_ceed, &ctx));
66 
67   // Check if COO pattern set
68   {
69     PetscInt index = -1;
70 
71     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
72       if (ctx->mats_assembled_pbd[i] == mat_coo) index = i;
73     }
74     if (index == -1) {
75       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
76       CeedInt      *rows_ceed, *cols_ceed;
77       PetscCount    num_entries;
78       PetscLogStage stage_amg_setup;
79 
80       // -- Assemble sparsity pattern if mat hasn't been assembled before
81       PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
82       if (stage_amg_setup == -1) {
83         PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
84       }
85       PetscCall(PetscLogStagePush(stage_amg_setup));
86       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
87       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
88       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
89       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
90       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
91       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
92       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
93       free(rows_petsc);
94       free(cols_petsc);
95       if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd));
96       PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd));
97       ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo;
98       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
99       PetscCall(PetscLogStagePop());
100     }
101   }
102 
103   // Assemble mat_ceed
104   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
105   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
106   {
107     const CeedScalar *values;
108     MatType           mat_type;
109     CeedMemType       mem_type = CEED_MEM_HOST;
110     PetscBool         is_spd, is_spd_known;
111 
112     PetscCall(MatGetType(mat_coo, &mat_type));
113     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
114     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
115     else mem_type = CEED_MEM_HOST;
116 
117     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
118     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE));
119     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
120     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values));
121     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
122     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
123     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
124     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values));
125   }
126   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
127   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 /**
132   @brief Assemble inner `Mat` for diagonal `PC` operations
133 
134   Collective across MPI processes.
135 
136   @param[in]   mat_ceed      `MATCEED` to invert
137   @param[in]   use_ceed_pbd  Boolean flag to use libCEED PBD assembly
138   @param[out]  mat_inner     Inner `Mat` for diagonal operations
139 
140   @return An error code: 0 - success, otherwise - failure
141 **/
142 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) {
143   MatCeedContext ctx;
144 
145   PetscFunctionBeginUser;
146   PetscCall(MatShellGetContext(mat_ceed, &ctx));
147   if (use_ceed_pbd) {
148     // Check if COO pattern set
149     if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_pbd_internal));
150 
151     // Assemble mat_assembled_full_internal
152     PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal));
153     if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal;
154   } else {
155     // Check if COO pattern set
156     if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
157 
158     // Assemble mat_assembled_full_internal
159     PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
160     if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal;
161   }
162   PetscFunctionReturn(PETSC_SUCCESS);
163 }
164 
165 /**
166   @brief Get `MATCEED` diagonal block for Jacobi.
167 
168   Collective across MPI processes.
169 
170   @param[in]   mat_ceed   `MATCEED` to invert
171   @param[out]  mat_block  The diagonal block matrix
172 
173   @return An error code: 0 - success, otherwise - failure
174 **/
175 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) {
176   Mat            mat_inner = NULL;
177   MatCeedContext ctx;
178 
179   PetscFunctionBeginUser;
180   PetscCall(MatShellGetContext(mat_ceed, &ctx));
181 
182   // Assemble inner mat if needed
183   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
184 
185   // Get block diagonal
186   PetscCall(MatGetDiagonalBlock(mat_inner, mat_block));
187   PetscFunctionReturn(PETSC_SUCCESS);
188 }
189 
190 /**
191   @brief Invert `MATCEED` diagonal block for Jacobi.
192 
193   Collective across MPI processes.
194 
195   @param[in]   mat_ceed  `MATCEED` to invert
196   @param[out]  values    The block inverses in column major order
197 
198   @return An error code: 0 - success, otherwise - failure
199 **/
200 static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) {
201   Mat            mat_inner = NULL;
202   MatCeedContext ctx;
203 
204   PetscFunctionBeginUser;
205   PetscCall(MatShellGetContext(mat_ceed, &ctx));
206 
207   // Assemble inner mat if needed
208   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
209 
210   // Invert PB diagonal
211   PetscCall(MatInvertBlockDiagonal(mat_inner, values));
212   PetscFunctionReturn(PETSC_SUCCESS);
213 }
214 
215 /**
216   @brief Invert `MATCEED` variable diagonal block for Jacobi.
217 
218   Collective across MPI processes.
219 
220   @param[in]   mat_ceed     `MATCEED` to invert
221   @param[in]   num_blocks   The number of blocks on the process
222   @param[in]   block_sizes  The size of each block on the process
223   @param[out]  values       The block inverses in column major order
224 
225   @return An error code: 0 - success, otherwise - failure
226 **/
227 static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) {
228   Mat            mat_inner = NULL;
229   MatCeedContext ctx;
230 
231   PetscFunctionBeginUser;
232   PetscCall(MatShellGetContext(mat_ceed, &ctx));
233 
234   // Assemble inner mat if needed
235   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner));
236 
237   // Invert PB diagonal
238   PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values));
239   PetscFunctionReturn(PETSC_SUCCESS);
240 }
241 
242 /**
243   @brief View `MATCEED`.
244 
245   Collective across MPI processes.
246 
247   @param[in]   mat_ceed  `MATCEED` to view
248   @param[in]   viewer    The visualization context
249 
250   @return An error code: 0 - success, otherwise - failure
251 **/
252 static PetscErrorCode MatView_Ceed(Mat mat_ceed, PetscViewer viewer) {
253   PetscBool         is_ascii;
254   PetscViewerFormat format;
255   PetscMPIInt       size;
256   MatCeedContext    ctx;
257 
258   PetscFunctionBeginUser;
259   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
260   PetscCall(MatShellGetContext(mat_ceed, &ctx));
261   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)mat_ceed), &viewer));
262 
263   PetscCall(PetscViewerGetFormat(viewer, &format));
264   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat_ceed), &size));
265   if (size == 1 && format == PETSC_VIEWER_LOAD_BALANCE) PetscFunctionReturn(PETSC_SUCCESS);
266 
267   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &is_ascii));
268   {
269     FILE *file;
270 
271     PetscCall(PetscViewerASCIIPrintf(viewer, "MatCEED:\n  Default COO MatType:%s\n", ctx->coo_mat_type));
272     PetscCall(PetscViewerASCIIGetPointer(viewer, &file));
273     PetscCall(PetscViewerASCIIPrintf(viewer, " libCEED Operator:\n"));
274     PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult, file));
275     if (ctx->op_mult_transpose) {
276       PetscCall(PetscViewerASCIIPrintf(viewer, "  libCEED Transpose Operator:\n"));
277       PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult_transpose, file));
278     }
279   }
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 // -----------------------------------------------------------------------------
284 // MatCeed
285 // -----------------------------------------------------------------------------
286 
287 /**
288   @brief Create PETSc `Mat` from libCEED operators.
289 
290   Collective across MPI processes.
291 
292   @param[in]   dm_x                      Input `DM`
293   @param[in]   dm_y                      Output `DM`
294   @param[in]   op_mult                   `CeedOperator` for forward evaluation
295   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
296   @param[out]  mat                        New MatCeed
297 
298   @return An error code: 0 - success, otherwise - failure
299 **/
300 PetscErrorCode MatCeedCreate(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) {
301   PetscInt       X_l_size, X_g_size, Y_l_size, Y_g_size;
302   VecType        vec_type;
303   MatCeedContext ctx;
304 
305   PetscFunctionBeginUser;
306   PetscCall(MatCeedRegisterLogEvents());
307 
308   // Collect context data
309   PetscCall(DMGetVecType(dm_x, &vec_type));
310   {
311     Vec X;
312 
313     PetscCall(DMGetGlobalVector(dm_x, &X));
314     PetscCall(VecGetSize(X, &X_g_size));
315     PetscCall(VecGetLocalSize(X, &X_l_size));
316     PetscCall(DMRestoreGlobalVector(dm_x, &X));
317   }
318   if (dm_y) {
319     Vec Y;
320 
321     PetscCall(DMGetGlobalVector(dm_y, &Y));
322     PetscCall(VecGetSize(Y, &Y_g_size));
323     PetscCall(VecGetLocalSize(Y, &Y_l_size));
324     PetscCall(DMRestoreGlobalVector(dm_y, &Y));
325   } else {
326     dm_y     = dm_x;
327     Y_g_size = X_g_size;
328     Y_l_size = X_l_size;
329   }
330 
331   // Create context
332   {
333     Vec X_loc, Y_loc_transpose = NULL;
334 
335     PetscCall(DMCreateLocalVector(dm_x, &X_loc));
336     PetscCall(VecZeroEntries(X_loc));
337     if (op_mult_transpose) {
338       PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose));
339       PetscCall(VecZeroEntries(Y_loc_transpose));
340     }
341     PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE,
342                                    MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE_CEEDOP, &ctx));
343     PetscCall(VecDestroy(&X_loc));
344     PetscCall(VecDestroy(&Y_loc_transpose));
345   }
346 
347   // Create mat
348   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat));
349   PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED));
350   // -- Set block and variable block sizes
351   if (dm_x == dm_y) {
352     MatType dm_mat_type, dm_mat_type_copy;
353     Mat     temp_mat;
354 
355     PetscCall(DMGetMatType(dm_x, &dm_mat_type));
356     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
357     PetscCall(DMSetMatType(dm_x, MATAIJ));
358     PetscCall(DMCreateMatrix(dm_x, &temp_mat));
359     PetscCall(DMSetMatType(dm_x, dm_mat_type_copy));
360     PetscCall(PetscFree(dm_mat_type_copy));
361 
362     {
363       PetscInt        block_size, num_blocks, max_vblock_size = PETSC_INT_MAX;
364       const PetscInt *vblock_sizes;
365 
366       // -- Get block sizes
367       PetscCall(MatGetBlockSize(temp_mat, &block_size));
368       PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes));
369       {
370         PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX};
371 
372         for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]);
373         PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max));
374         max_vblock_size = global_min_max[1];
375       }
376 
377       // -- Copy block sizes
378       if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size));
379       if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes));
380 
381       // -- Check libCEED compatibility
382       {
383         bool is_composite;
384 
385         ctx->is_ceed_pbd_valid  = PETSC_TRUE;
386         ctx->is_ceed_vpbd_valid = PETSC_TRUE;
387         PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite));
388         if (is_composite) {
389           CeedInt       num_sub_operators;
390           CeedOperator *sub_operators;
391 
392           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators));
393           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators));
394           for (CeedInt i = 0; i < num_sub_operators; i++) {
395             CeedInt                  num_bases, num_comp;
396             CeedBasis               *active_bases;
397             CeedOperatorAssemblyData assembly_data;
398 
399             PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data));
400             PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
401             PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
402             if (num_bases > 1) {
403               ctx->is_ceed_pbd_valid  = PETSC_FALSE;
404               ctx->is_ceed_vpbd_valid = PETSC_FALSE;
405             }
406             if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
407             if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
408           }
409         } else {
410           // LCOV_EXCL_START
411           CeedInt                  num_bases, num_comp;
412           CeedBasis               *active_bases;
413           CeedOperatorAssemblyData assembly_data;
414 
415           PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data));
416           PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
417           PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
418           if (num_bases > 1) {
419             ctx->is_ceed_pbd_valid  = PETSC_FALSE;
420             ctx->is_ceed_vpbd_valid = PETSC_FALSE;
421           }
422           if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
423           if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
424           // LCOV_EXCL_STOP
425         }
426         {
427           PetscInt local_is_valid[2], global_is_valid[2];
428 
429           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid;
430           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
431           ctx->is_ceed_pbd_valid = global_is_valid[0];
432           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid;
433           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
434           ctx->is_ceed_vpbd_valid = global_is_valid[0];
435         }
436       }
437     }
438     PetscCall(MatDestroy(&temp_mat));
439   }
440   // -- Set internal mat type
441   {
442     VecType vec_type;
443     MatType coo_mat_type;
444 
445     PetscCall(VecGetType(ctx->X_loc, &vec_type));
446     if (strstr(vec_type, VECCUDA)) coo_mat_type = MATAIJCUSPARSE;
447     else if (strstr(vec_type, VECKOKKOS)) coo_mat_type = MATAIJKOKKOS;
448     else coo_mat_type = MATAIJ;
449     PetscCall(PetscStrallocpy(coo_mat_type, &ctx->coo_mat_type));
450   }
451   // -- Set mat operations
452   PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
453   PetscCall(MatShellSetOperation(*mat, MATOP_VIEW, (void (*)(void))MatView_Ceed));
454   PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed));
455   if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
456   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
457   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
458   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
459   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
460   PetscCall(MatShellSetVecType(*mat, vec_type));
461   PetscFunctionReturn(PETSC_SUCCESS);
462 }
463 
464 /**
465   @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`.
466 
467   Collective across MPI processes.
468 
469   @param[in]   mat_ceed   `MATCEED` to copy from
470   @param[out]  mat_other  `MatShell` or `MATCEED` to copy into
471 
472   @return An error code: 0 - success, otherwise - failure
473 **/
474 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) {
475   PetscFunctionBeginUser;
476   PetscCall(MatCeedRegisterLogEvents());
477 
478   // Check type compatibility
479   {
480     PetscBool is_matceed = PETSC_FALSE, is_matshell = PETSC_FALSE;
481     MatType   mat_type_ceed, mat_type_other;
482 
483     PetscCall(MatGetType(mat_ceed, &mat_type_ceed));
484     PetscCall(PetscStrcmp(mat_type_ceed, MATCEED, &is_matceed));
485     PetscCheck(is_matceed, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED);
486     PetscCall(MatGetType(mat_other, &mat_type_other));
487     PetscCall(PetscStrcmp(mat_type_other, MATCEED, &is_matceed));
488     PetscCall(PetscStrcmp(mat_type_other, MATSHELL, &is_matceed));
489     PetscCheck(is_matceed || is_matshell, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_other must have type " MATCEED " or " MATSHELL);
490   }
491 
492   // Check dimension compatibility
493   {
494     PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size;
495 
496     PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size));
497     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size));
498     PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size));
499     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size));
500     PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) &&
501                    (X_l_ceed_size == X_l_other_size),
502                PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ,
503                "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT
504                "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT
505                ", %" PetscInt_FMT ")",
506                Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size);
507   }
508 
509   // Convert
510   {
511     VecType        vec_type;
512     MatCeedContext ctx;
513 
514     PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED));
515     PetscCall(MatShellGetContext(mat_ceed, &ctx));
516     PetscCall(MatCeedContextReference(ctx));
517     PetscCall(MatShellSetContext(mat_other, ctx));
518     PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
519     PetscCall(MatShellSetOperation(mat_other, MATOP_VIEW, (void (*)(void))MatView_Ceed));
520     PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed));
521     if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
522     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
523     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
524     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
525     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
526     {
527       PetscInt block_size;
528 
529       PetscCall(MatGetBlockSize(mat_ceed, &block_size));
530       if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size));
531     }
532     {
533       PetscInt        num_blocks;
534       const PetscInt *block_sizes;
535 
536       PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes));
537       if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes));
538     }
539     PetscCall(DMGetVecType(ctx->dm_x, &vec_type));
540     PetscCall(MatShellSetVecType(mat_other, vec_type));
541   }
542   PetscFunctionReturn(PETSC_SUCCESS);
543 }
544 
545 /**
546   @brief Setup a `Mat` with the same COO pattern as a `MatCEED`.
547 
548   Collective across MPI processes.
549 
550   @param[in]   mat_ceed  `MATCEED`
551   @param[out]  mat_coo   Sparse `Mat` with same COO pattern
552 
553   @return An error code: 0 - success, otherwise - failure
554 **/
555 PetscErrorCode MatCeedCreateMatCOO(Mat mat_ceed, Mat *mat_coo) {
556   MatCeedContext ctx;
557 
558   PetscFunctionBeginUser;
559   PetscCall(MatShellGetContext(mat_ceed, &ctx));
560 
561   PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "COO assembly only supported for MATCEED on a single DM");
562 
563   // Check cl mat type
564   {
565     PetscBool is_coo_mat_type_cl = PETSC_FALSE;
566     char      coo_mat_type_cl[64];
567 
568     // Check for specific CL coo mat type for this Mat
569     {
570       const char *mat_ceed_prefix = NULL;
571 
572       PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix));
573       PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL);
574       PetscCall(PetscOptionsFList("-ceed_coo_mat_type", "Default MATCEED COO assembly MatType", NULL, MatList, coo_mat_type_cl, coo_mat_type_cl,
575                                   sizeof(coo_mat_type_cl), &is_coo_mat_type_cl));
576       PetscOptionsEnd();
577       if (is_coo_mat_type_cl) {
578         PetscCall(PetscFree(ctx->coo_mat_type));
579         PetscCall(PetscStrallocpy(coo_mat_type_cl, &ctx->coo_mat_type));
580       }
581     }
582   }
583 
584   // Create sparse matrix
585   {
586     MatType dm_mat_type, dm_mat_type_copy;
587 
588     PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type));
589     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
590     PetscCall(DMSetMatType(ctx->dm_x, ctx->coo_mat_type));
591     PetscCall(DMCreateMatrix(ctx->dm_x, mat_coo));
592     PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy));
593     PetscCall(PetscFree(dm_mat_type_copy));
594   }
595   PetscFunctionReturn(PETSC_SUCCESS);
596 }
597 
598 /**
599   @brief Setup the COO preallocation `MATCEED` into a `MATAIJ` or similar.
600          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
601 
602   Collective across MPI processes.
603 
604   @param[in]      mat_ceed  `MATCEED` to assemble
605   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
606 
607   @return An error code: 0 - success, otherwise - failure
608 **/
609 PetscErrorCode MatCeedSetPreallocationCOO(Mat mat_ceed, Mat mat_coo) {
610   MatCeedContext ctx;
611 
612   PetscFunctionBeginUser;
613   PetscCall(MatShellGetContext(mat_ceed, &ctx));
614 
615   {
616     PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
617     CeedInt      *rows_ceed, *cols_ceed;
618     PetscCount    num_entries;
619     PetscLogStage stage_amg_setup;
620 
621     // -- Assemble sparsity pattern if mat hasn't been assembled before
622     PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
623     if (stage_amg_setup == -1) {
624       PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
625     }
626     PetscCall(PetscLogStagePush(stage_amg_setup));
627     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
628     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
629     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
630     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
631     PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
632     PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
633     PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
634     free(rows_petsc);
635     free(cols_petsc);
636     if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full));
637     PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full));
638     ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo;
639     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
640     PetscCall(PetscLogStagePop());
641   }
642   PetscFunctionReturn(PETSC_SUCCESS);
643 }
644 
645 /**
646   @brief Assemble a `MATCEED` into a `MATAIJ` or similar.
647          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
648          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
649 
650   Collective across MPI processes.
651 
652   @param[in]      mat_ceed  `MATCEED` to assemble
653   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
654 
655   @return An error code: 0 - success, otherwise - failure
656 **/
657 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) {
658   MatCeedContext ctx;
659 
660   PetscFunctionBeginUser;
661   PetscCall(MatShellGetContext(mat_ceed, &ctx));
662 
663   // Set COO pattern if needed
664   {
665     CeedInt index = -1;
666 
667     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
668       if (ctx->mats_assembled_full[i] == mat_coo) index = i;
669     }
670     if (index == -1) PetscCall(MatCeedSetPreallocationCOO(mat_ceed, mat_coo));
671   }
672 
673   // Assemble mat_ceed
674   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
675   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
676   {
677     const CeedScalar *values;
678     MatType           mat_type;
679     CeedMemType       mem_type = CEED_MEM_HOST;
680     PetscBool         is_spd, is_spd_known;
681 
682     PetscCall(MatGetType(mat_coo, &mat_type));
683     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
684     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
685     else mem_type = CEED_MEM_HOST;
686 
687     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
688     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full));
689     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
690     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values));
691     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
692     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
693     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
694     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values));
695   }
696   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
697   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
698   PetscFunctionReturn(PETSC_SUCCESS);
699 }
700 
701 /**
702   @brief Set the current value of a context field for a `MatCEED`.
703 
704   Not collective across MPI processes.
705 
706   @param[in,out]  mat    `MatCEED`
707   @param[in]      name   Name of the context field
708   @param[in]      value  New context field value
709 
710   @return An error code: 0 - success, otherwise - failure
711 **/
712 PetscErrorCode MatCeedSetContextDouble(Mat mat, const char *name, double value) {
713   PetscBool      was_updated = PETSC_FALSE;
714   MatCeedContext ctx;
715 
716   PetscFunctionBeginUser;
717   PetscCall(MatShellGetContext(mat, &ctx));
718   {
719     CeedContextFieldLabel label = NULL;
720 
721     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult, name, &label));
722     if (label) {
723       PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult, label, &value));
724       was_updated = PETSC_TRUE;
725     }
726     if (ctx->op_mult_transpose) {
727       label = NULL;
728       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult_transpose, name, &label));
729       if (label) {
730         PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult_transpose, label, &value));
731         was_updated = PETSC_TRUE;
732       }
733     }
734   }
735   if (was_updated) {
736     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
737     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
738   }
739   PetscFunctionReturn(PETSC_SUCCESS);
740 }
741 
742 /**
743   @brief Set the current `PetscReal` value of a context field for a `MatCEED`.
744 
745   Not collective across MPI processes.
746 
747   @param[in,out]  mat    `MatCEED`
748   @param[in]      name   Name of the context field
749   @param[in]      value  New context field value
750 
751   @return An error code: 0 - success, otherwise - failure
752 **/
753 PetscErrorCode MatCeedSetContextReal(Mat mat, const char *name, PetscReal value) {
754   double value_double = value;
755 
756   PetscFunctionBeginUser;
757   PetscCall(MatCeedSetContextDouble(mat, name, value_double));
758   PetscFunctionReturn(PETSC_SUCCESS);
759 }
760 
761 /**
762   @brief Get the current value of a context field for a `MatCEED`.
763 
764   Not collective across MPI processes.
765 
766   @param[in]   mat    `MatCEED`
767   @param[in]   name   Name of the context field
768   @param[out]  value  Current context field value
769 
770   @return An error code: 0 - success, otherwise - failure
771 **/
772 PetscErrorCode MatCeedGetContextDouble(Mat mat, const char *name, double *value) {
773   MatCeedContext ctx;
774 
775   PetscFunctionBeginUser;
776   PetscCall(MatShellGetContext(mat, &ctx));
777   {
778     CeedContextFieldLabel label = NULL;
779     CeedOperator          op    = ctx->op_mult;
780 
781     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
782     if (!label && ctx->op_mult_transpose) {
783       op = ctx->op_mult_transpose;
784       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
785     }
786     if (label) {
787       PetscSizeT    num_values;
788       const double *values_ceed;
789 
790       PetscCallCeed(ctx->ceed, CeedOperatorGetContextDoubleRead(op, label, &num_values, &values_ceed));
791       *value = values_ceed[0];
792       PetscCallCeed(ctx->ceed, CeedOperatorRestoreContextDoubleRead(op, label, &values_ceed));
793     }
794   }
795   PetscFunctionReturn(PETSC_SUCCESS);
796 }
797 
798 /**
799   @brief Get the current `PetscReal` value of a context field for a `MatCEED`.
800 
801   Not collective across MPI processes.
802 
803   @param[in]   mat    `MatCEED`
804   @param[in]   name   Name of the context field
805   @param[out]  value  Current context field value
806 
807   @return An error code: 0 - success, otherwise - failure
808 **/
809 PetscErrorCode MatCeedGetContextReal(Mat mat, const char *name, PetscReal *value) {
810   double value_double;
811 
812   PetscFunctionBeginUser;
813   PetscCall(MatCeedGetContextDouble(mat, name, &value_double));
814   *value = value_double;
815   PetscFunctionReturn(PETSC_SUCCESS);
816 }
817 
818 /**
819   @brief Set user context for a `MATCEED`.
820 
821   Collective across MPI processes.
822 
823   @param[in,out]  mat  `MATCEED`
824   @param[in]      f    The context destroy function, or NULL
825   @param[in]      ctx  User context, or NULL to unset
826 
827   @return An error code: 0 - success, otherwise - failure
828 **/
829 PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) {
830   PetscContainer user_ctx = NULL;
831 
832   PetscFunctionBeginUser;
833   if (ctx) {
834     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx));
835     PetscCall(PetscContainerSetPointer(user_ctx, ctx));
836     PetscCall(PetscContainerSetUserDestroy(user_ctx, f));
837   }
838   PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx));
839   PetscCall(PetscContainerDestroy(&user_ctx));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 /**
844   @brief Retrieve the user context for a `MATCEED`.
845 
846   Collective across MPI processes.
847 
848   @param[in,out]  mat  `MATCEED`
849   @param[in]      ctx  User context
850 
851   @return An error code: 0 - success, otherwise - failure
852 **/
853 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) {
854   PetscContainer user_ctx;
855 
856   PetscFunctionBeginUser;
857   PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx));
858   if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx));
859   else *(void **)ctx = NULL;
860   PetscFunctionReturn(PETSC_SUCCESS);
861 }
862 /**
863   @brief Set a user defined matrix operation for a `MATCEED` matrix.
864 
865   Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by
866 `MatCeedSetContext()`.
867 
868   Collective across MPI processes.
869 
870   @param[in,out]  mat  `MATCEED`
871   @param[in]      op   Name of the `MatOperation`
872   @param[in]      g    Function that provides the operation
873 
874   @return An error code: 0 - success, otherwise - failure
875 **/
876 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) {
877   PetscFunctionBeginUser;
878   PetscCall(MatShellSetOperation(mat, op, g));
879   PetscFunctionReturn(PETSC_SUCCESS);
880 }
881 
882 /**
883   @brief Sets the default COO matrix type as a string from the `MATCEED`.
884 
885   Collective across MPI processes.
886 
887   @param[in,out]  mat   `MATCEED`
888   @param[in]      type  COO `MatType` to set
889 
890   @return An error code: 0 - success, otherwise - failure
891 **/
892 PetscErrorCode MatCeedSetCOOMatType(Mat mat, MatType type) {
893   MatCeedContext ctx;
894 
895   PetscFunctionBeginUser;
896   PetscCall(MatShellGetContext(mat, &ctx));
897   // Check if same
898   {
899     size_t    len_old, len_new;
900     PetscBool is_same = PETSC_FALSE;
901 
902     PetscCall(PetscStrlen(ctx->coo_mat_type, &len_old));
903     PetscCall(PetscStrlen(type, &len_new));
904     if (len_old == len_new) PetscCall(PetscStrncmp(ctx->coo_mat_type, type, len_old, &is_same));
905     if (is_same) PetscFunctionReturn(PETSC_SUCCESS);
906   }
907   // Clean up old mats in different format
908   // LCOV_EXCL_START
909   if (ctx->mat_assembled_full_internal) {
910     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
911       if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) {
912         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) {
913           ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j];
914         }
915         ctx->num_mats_assembled_full--;
916         // Note: we'll realloc this array again, so no need to shrink the allocation
917         PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
918       }
919     }
920   }
921   if (ctx->mat_assembled_pbd_internal) {
922     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
923       if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) {
924         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) {
925           ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j];
926         }
927         // Note: we'll realloc this array again, so no need to shrink the allocation
928         ctx->num_mats_assembled_pbd--;
929         PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
930       }
931     }
932   }
933   PetscCall(PetscFree(ctx->coo_mat_type));
934   PetscCall(PetscStrallocpy(type, &ctx->coo_mat_type));
935   PetscFunctionReturn(PETSC_SUCCESS);
936   // LCOV_EXCL_STOP
937 }
938 
939 /**
940   @brief Gets the default COO matrix type as a string from the `MATCEED`.
941 
942   Collective across MPI processes.
943 
944   @param[in,out]  mat   `MATCEED`
945   @param[in]      type  COO `MatType`
946 
947   @return An error code: 0 - success, otherwise - failure
948 **/
949 PetscErrorCode MatCeedGetCOOMatType(Mat mat, MatType *type) {
950   MatCeedContext ctx;
951 
952   PetscFunctionBeginUser;
953   PetscCall(MatShellGetContext(mat, &ctx));
954   *type = ctx->coo_mat_type;
955   PetscFunctionReturn(PETSC_SUCCESS);
956 }
957 
958 /**
959   @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
960 
961   Not collective across MPI processes.
962 
963   @param[in,out]  mat              `MATCEED`
964   @param[in]      X_loc            Input PETSc local vector, or NULL
965   @param[in]      Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
966 
967   @return An error code: 0 - success, otherwise - failure
968 **/
969 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) {
970   MatCeedContext ctx;
971 
972   PetscFunctionBeginUser;
973   PetscCall(MatShellGetContext(mat, &ctx));
974   if (X_loc) {
975     PetscInt len_old, len_new;
976 
977     PetscCall(VecGetSize(ctx->X_loc, &len_old));
978     PetscCall(VecGetSize(X_loc, &len_new));
979     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT,
980                len_new, len_old);
981     PetscCall(VecReferenceCopy(X_loc, &ctx->X_loc));
982   }
983   if (Y_loc_transpose) {
984     PetscInt len_old, len_new;
985 
986     PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old));
987     PetscCall(VecGetSize(Y_loc_transpose, &len_new));
988     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB,
989                "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old);
990     PetscCall(VecReferenceCopy(Y_loc_transpose, &ctx->Y_loc_transpose));
991   }
992   PetscFunctionReturn(PETSC_SUCCESS);
993 }
994 
995 /**
996   @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
997 
998   Not collective across MPI processes.
999 
1000   @param[in,out]  mat              `MATCEED`
1001   @param[out]     X_loc            Input PETSc local vector, or NULL
1002   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1003 
1004   @return An error code: 0 - success, otherwise - failure
1005 **/
1006 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1007   MatCeedContext ctx;
1008 
1009   PetscFunctionBeginUser;
1010   PetscCall(MatShellGetContext(mat, &ctx));
1011   if (X_loc) {
1012     *X_loc = NULL;
1013     PetscCall(VecReferenceCopy(ctx->X_loc, X_loc));
1014   }
1015   if (Y_loc_transpose) {
1016     *Y_loc_transpose = NULL;
1017     PetscCall(VecReferenceCopy(ctx->Y_loc_transpose, Y_loc_transpose));
1018   }
1019   PetscFunctionReturn(PETSC_SUCCESS);
1020 }
1021 
1022 /**
1023   @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1024 
1025   Not collective across MPI processes.
1026 
1027   @param[in,out]  mat              MatCeed
1028   @param[out]     X_loc            Input PETSc local vector, or NULL
1029   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1030 
1031   @return An error code: 0 - success, otherwise - failure
1032 **/
1033 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1034   PetscFunctionBeginUser;
1035   if (X_loc) PetscCall(VecDestroy(X_loc));
1036   if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose));
1037   PetscFunctionReturn(PETSC_SUCCESS);
1038 }
1039 
1040 /**
1041   @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1042 
1043   Not collective across MPI processes.
1044 
1045   @param[in,out]  mat                MatCeed
1046   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1047   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1048 
1049   @return An error code: 0 - success, otherwise - failure
1050 **/
1051 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1052   MatCeedContext ctx;
1053 
1054   PetscFunctionBeginUser;
1055   PetscCall(MatShellGetContext(mat, &ctx));
1056   if (op_mult) {
1057     *op_mult = NULL;
1058     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult));
1059   }
1060   if (op_mult_transpose) {
1061     *op_mult_transpose = NULL;
1062     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose));
1063   }
1064   PetscFunctionReturn(PETSC_SUCCESS);
1065 }
1066 
1067 /**
1068   @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1069 
1070   Not collective across MPI processes.
1071 
1072   @param[in,out]  mat                MatCeed
1073   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1074   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1075 
1076   @return An error code: 0 - success, otherwise - failure
1077 **/
1078 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1079   MatCeedContext ctx;
1080 
1081   PetscFunctionBeginUser;
1082   PetscCall(MatShellGetContext(mat, &ctx));
1083   if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult));
1084   if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose));
1085   PetscFunctionReturn(PETSC_SUCCESS);
1086 }
1087 
1088 /**
1089   @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1090 
1091   Not collective across MPI processes.
1092 
1093   @param[in,out]  mat                       MatCeed
1094   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1095   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1096 
1097   @return An error code: 0 - success, otherwise - failure
1098 **/
1099 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1100   MatCeedContext ctx;
1101 
1102   PetscFunctionBeginUser;
1103   PetscCall(MatShellGetContext(mat, &ctx));
1104   if (log_event_mult) ctx->log_event_mult = log_event_mult;
1105   if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose;
1106   PetscFunctionReturn(PETSC_SUCCESS);
1107 }
1108 
1109 /**
1110   @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1111 
1112   Not collective across MPI processes.
1113 
1114   @param[in,out]  mat                       MatCeed
1115   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1116   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1117 
1118   @return An error code: 0 - success, otherwise - failure
1119 **/
1120 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1121   MatCeedContext ctx;
1122 
1123   PetscFunctionBeginUser;
1124   PetscCall(MatShellGetContext(mat, &ctx));
1125   if (log_event_mult) *log_event_mult = ctx->log_event_mult;
1126   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose;
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 /**
1131   @brief Set `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1132 
1133   Not collective across MPI processes.
1134 
1135   @param[in,out]  mat                       MatCeed
1136   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1137   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1138 
1139   @return An error code: 0 - success, otherwise - failure
1140 **/
1141 PetscErrorCode MatCeedSetCeedOperatorLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1142   MatCeedContext ctx;
1143 
1144   PetscFunctionBeginUser;
1145   PetscCall(MatShellGetContext(mat, &ctx));
1146   if (log_event_mult) ctx->log_event_ceed_mult = log_event_mult;
1147   if (log_event_mult_transpose) ctx->log_event_ceed_mult_transpose = log_event_mult_transpose;
1148   PetscFunctionReturn(PETSC_SUCCESS);
1149 }
1150 
1151 /**
1152   @brief Get `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1153 
1154   Not collective across MPI processes.
1155 
1156   @param[in,out]  mat                       MatCeed
1157   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1158   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1159 
1160   @return An error code: 0 - success, otherwise - failure
1161 **/
1162 PetscErrorCode MatCeedGetCeedOperatorLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1163   MatCeedContext ctx;
1164 
1165   PetscFunctionBeginUser;
1166   PetscCall(MatShellGetContext(mat, &ctx));
1167   if (log_event_mult) *log_event_mult = ctx->log_event_ceed_mult;
1168   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_ceed_mult_transpose;
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 // -----------------------------------------------------------------------------
1173 // Operator context data
1174 // -----------------------------------------------------------------------------
1175 
1176 /**
1177   @brief Setup context data for operator application.
1178 
1179   Collective across MPI processes.
1180 
1181   @param[in]   dm_x                           Input `DM`
1182   @param[in]   dm_y                           Output `DM`
1183   @param[in]   X_loc                          Input PETSc local vector, or NULL
1184   @param[in]   Y_loc_transpose                Input PETSc local vector for transpose operation, or NULL
1185   @param[in]   op_mult                        `CeedOperator` for forward evaluation
1186   @param[in]   op_mult_transpose              `CeedOperator` for transpose evaluation
1187   @param[in]   log_event_mult                 `PetscLogEvent` for forward evaluation
1188   @param[in]   log_event_mult_transpose       `PetscLogEvent` for transpose evaluation
1189   @param[in]   log_event_ceed_mult            `PetscLogEvent` for forward `CeedOperator` evaluation
1190   @param[in]   log_event_ceed_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation
1191   @param[out]  ctx                            Context data for operator evaluation
1192 
1193   @return An error code: 0 - success, otherwise - failure
1194 **/
1195 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose,
1196                                     PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, PetscLogEvent log_event_ceed_mult,
1197                                     PetscLogEvent log_event_ceed_mult_transpose, MatCeedContext *ctx) {
1198   CeedSize x_loc_len, y_loc_len;
1199 
1200   PetscFunctionBeginUser;
1201 
1202   // Allocate
1203   PetscCall(PetscNew(ctx));
1204   (*ctx)->ref_count = 1;
1205 
1206   // Logging
1207   (*ctx)->log_event_mult                = log_event_mult;
1208   (*ctx)->log_event_mult_transpose      = log_event_mult_transpose;
1209   (*ctx)->log_event_ceed_mult           = log_event_ceed_mult;
1210   (*ctx)->log_event_ceed_mult_transpose = log_event_ceed_mult_transpose;
1211 
1212   // PETSc objects
1213   PetscCall(DMReferenceCopy(dm_x, &(*ctx)->dm_x));
1214   PetscCall(DMReferenceCopy(dm_y, &(*ctx)->dm_y));
1215   if (X_loc) PetscCall(VecReferenceCopy(X_loc, &(*ctx)->X_loc));
1216   if (Y_loc_transpose) PetscCall(VecReferenceCopy(Y_loc_transpose, &(*ctx)->Y_loc_transpose));
1217 
1218   // Memtype
1219   {
1220     const PetscScalar *x;
1221     Vec                X;
1222 
1223     PetscCall(DMGetLocalVector(dm_x, &X));
1224     PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type));
1225     PetscCall(VecRestoreArrayReadAndMemType(X, &x));
1226     PetscCall(DMRestoreLocalVector(dm_x, &X));
1227   }
1228 
1229   // libCEED objects
1230   PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB,
1231              "retrieving Ceed context object failed");
1232   PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed));
1233   PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len));
1234   PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult));
1235   if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose));
1236   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc));
1237   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc));
1238 
1239   // Flop counting
1240   {
1241     CeedSize ceed_flops_estimate = 0;
1242 
1243     PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate));
1244     (*ctx)->flops_mult = ceed_flops_estimate;
1245     if (op_mult_transpose) {
1246       PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate));
1247       (*ctx)->flops_mult_transpose = ceed_flops_estimate;
1248     }
1249   }
1250 
1251   // Check sizes
1252   if (x_loc_len > 0 || y_loc_len > 0) {
1253     CeedSize ctx_x_loc_len, ctx_y_loc_len;
1254     PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len;
1255     Vec      dm_X_loc, dm_Y_loc;
1256 
1257     // -- Input
1258     PetscCall(DMGetLocalVector(dm_x, &dm_X_loc));
1259     PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len));
1260     PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc));
1261     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len));
1262     if (X_loc) {
1263       PetscCall(VecGetLocalSize(X_loc, &X_loc_len));
1264       PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1265                  "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len);
1266     }
1267     PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions",
1268                x_loc_len, dm_x_loc_len);
1269     PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")",
1270                x_loc_len, ctx_x_loc_len);
1271 
1272     // -- Output
1273     PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc));
1274     PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len));
1275     PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc));
1276     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len));
1277     PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions",
1278                ctx_y_loc_len, dm_y_loc_len);
1279 
1280     // -- Transpose
1281     if (Y_loc_transpose) {
1282       PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len));
1283       PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1284                  "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len);
1285     }
1286   }
1287   PetscFunctionReturn(PETSC_SUCCESS);
1288 }
1289 
1290 /**
1291   @brief Increment reference counter for `MATCEED` context.
1292 
1293   Not collective across MPI processes.
1294 
1295   @param[in,out]  ctx  Context data
1296 
1297   @return An error code: 0 - success, otherwise - failure
1298 **/
1299 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) {
1300   PetscFunctionBeginUser;
1301   ctx->ref_count++;
1302   PetscFunctionReturn(PETSC_SUCCESS);
1303 }
1304 
1305 /**
1306   @brief Copy reference for `MATCEED`.
1307          Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`.
1308 
1309   Not collective across MPI processes.
1310 
1311   @param[in]   ctx       Context data
1312   @param[out]  ctx_copy  Copy of pointer to context data
1313 
1314   @return An error code: 0 - success, otherwise - failure
1315 **/
1316 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) {
1317   PetscFunctionBeginUser;
1318   PetscCall(MatCeedContextReference(ctx));
1319   PetscCall(MatCeedContextDestroy(*ctx_copy));
1320   *ctx_copy = ctx;
1321   PetscFunctionReturn(PETSC_SUCCESS);
1322 }
1323 
1324 /**
1325   @brief Destroy context data for operator application.
1326 
1327   Collective across MPI processes.
1328 
1329   @param[in,out]  ctx  Context data for operator evaluation
1330 
1331   @return An error code: 0 - success, otherwise - failure
1332 **/
1333 PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) {
1334   PetscFunctionBeginUser;
1335   if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS);
1336 
1337   // PETSc objects
1338   PetscCall(DMDestroy(&ctx->dm_x));
1339   PetscCall(DMDestroy(&ctx->dm_y));
1340   PetscCall(VecDestroy(&ctx->X_loc));
1341   PetscCall(VecDestroy(&ctx->Y_loc_transpose));
1342   PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1343   PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1344   PetscCall(PetscFree(ctx->coo_mat_type));
1345   PetscCall(PetscFree(ctx->mats_assembled_full));
1346   PetscCall(PetscFree(ctx->mats_assembled_pbd));
1347 
1348   // libCEED objects
1349   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc));
1350   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc));
1351   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full));
1352   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd));
1353   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult));
1354   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose));
1355   PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed");
1356 
1357   // Deallocate
1358   ctx->is_destroyed = PETSC_TRUE;  // Flag as destroyed in case someone has stale ref
1359   PetscCall(PetscFree(ctx));
1360   PetscFunctionReturn(PETSC_SUCCESS);
1361 }
1362 
1363 /**
1364   @brief Compute the diagonal of an operator via libCEED.
1365 
1366   Collective across MPI processes.
1367 
1368   @param[in]   A  `MATCEED`
1369   @param[out]  D  Vector holding operator diagonal
1370 
1371   @return An error code: 0 - success, otherwise - failure
1372 **/
1373 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) {
1374   PetscMemType   mem_type;
1375   Vec            D_loc;
1376   MatCeedContext ctx;
1377 
1378   PetscFunctionBeginUser;
1379   PetscCall(MatShellGetContext(A, &ctx));
1380 
1381   // Place PETSc vector in libCEED vector
1382   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1383   PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc));
1384   PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc));
1385 
1386   // Compute Diagonal
1387   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1388   PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1389   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1390 
1391   // Restore PETSc vector
1392   PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc));
1393 
1394   // Local-to-Global
1395   PetscCall(VecZeroEntries(D));
1396   PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D));
1397   PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc));
1398   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1399   PetscFunctionReturn(PETSC_SUCCESS);
1400 }
1401 
1402 /**
1403   @brief Compute `A X = Y` for a `MATCEED`.
1404 
1405   Collective across MPI processes.
1406 
1407   @param[in]   A  `MATCEED`
1408   @param[in]   X  Input PETSc vector
1409   @param[out]  Y  Output PETSc vector
1410 
1411   @return An error code: 0 - success, otherwise - failure
1412 **/
1413 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) {
1414   MatCeedContext ctx;
1415 
1416   PetscFunctionBeginUser;
1417   PetscCall(MatShellGetContext(A, &ctx));
1418   PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, NULL));
1419 
1420   {
1421     PetscMemType x_mem_type, y_mem_type;
1422     Vec          X_loc = ctx->X_loc, Y_loc;
1423 
1424     // Get local vectors
1425     if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1426     PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1427 
1428     // Global-to-local
1429     PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc));
1430 
1431     // Setup libCEED vectors
1432     PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1433     PetscCall(VecZeroEntries(Y_loc));
1434     PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1435 
1436     // Apply libCEED operator
1437     PetscCall(PetscLogGpuTimeBegin());
1438     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult, A, X, Y, NULL));
1439     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE));
1440     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult, A, X, Y, NULL));
1441     PetscCall(PetscLogGpuTimeEnd());
1442 
1443     // Restore PETSc vectors
1444     PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1445     PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1446 
1447     // Local-to-global
1448     PetscCall(VecZeroEntries(Y));
1449     PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y));
1450 
1451     // Restore local vectors, as needed
1452     if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1453     PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1454   }
1455 
1456   // Log flops
1457   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult));
1458   else PetscCall(PetscLogFlops(ctx->flops_mult));
1459   PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, NULL));
1460   PetscFunctionReturn(PETSC_SUCCESS);
1461 }
1462 
1463 /**
1464   @brief Compute `A^T Y = X` for a `MATCEED`.
1465 
1466   Collective across MPI processes.
1467 
1468   @param[in]   A  `MATCEED`
1469   @param[in]   Y  Input PETSc vector
1470   @param[out]  X  Output PETSc vector
1471 
1472   @return An error code: 0 - success, otherwise - failure
1473 **/
1474 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) {
1475   MatCeedContext ctx;
1476 
1477   PetscFunctionBeginUser;
1478   PetscCall(MatShellGetContext(A, &ctx));
1479   PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, NULL));
1480 
1481   {
1482     PetscMemType x_mem_type, y_mem_type;
1483     Vec          X_loc, Y_loc = ctx->Y_loc_transpose;
1484 
1485     // Get local vectors
1486     if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1487     PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1488 
1489     // Global-to-local
1490     PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc));
1491 
1492     // Setup libCEED vectors
1493     PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1494     PetscCall(VecZeroEntries(X_loc));
1495     PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1496 
1497     // Apply libCEED operator
1498     PetscCall(PetscLogGpuTimeBegin());
1499     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1500     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1501     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1502     PetscCall(PetscLogGpuTimeEnd());
1503 
1504     // Restore PETSc vectors
1505     PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1506     PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1507 
1508     // Local-to-global
1509     PetscCall(VecZeroEntries(X));
1510     PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X));
1511 
1512     // Restore local vectors, as needed
1513     if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1514     PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1515   }
1516 
1517   // Log flops
1518   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose));
1519   else PetscCall(PetscLogFlops(ctx->flops_mult_transpose));
1520   PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, NULL));
1521   PetscFunctionReturn(PETSC_SUCCESS);
1522 }
1523