xref: /libCEED/examples/fluids/src/mat-ceed.c (revision 24a65d3da2f623912f26b42c0b9ba6f37de25307)
1 /// @file
2 /// MatCeed and it's related operators
3 
4 #include <ceed-utils.h>
5 #include <ceed.h>
6 #include <ceed/backend.h>
7 #include <mat-ceed-impl.h>
8 #include <mat-ceed.h>
9 #include <petscdmplex.h>
10 #include <stdlib.h>
11 #include <string.h>
12 
13 PetscClassId  MATCEED_CLASSID;
14 PetscLogEvent MATCEED_MULT, MATCEED_MULT_TRANSPOSE;
15 
16 /**
17   @brief Register MATCEED log events.
18 
19   Not collective across MPI processes.
20 
21   @return An error code: 0 - success, otherwise - failure
22 **/
23 static PetscErrorCode MatCeedRegisterLogEvents() {
24   static bool registered = false;
25 
26   PetscFunctionBeginUser;
27   if (registered) PetscFunctionReturn(PETSC_SUCCESS);
28   PetscCall(PetscClassIdRegister("MATCEED", &MATCEED_CLASSID));
29   PetscCall(PetscLogEventRegister("MATCEED Mult", MATCEED_CLASSID, &MATCEED_MULT));
30   PetscCall(PetscLogEventRegister("MATCEED Mult Transpose", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE));
31   registered = true;
32   PetscFunctionReturn(PETSC_SUCCESS);
33 }
34 
35 /**
36   @brief Setup inner `Mat` for `PC` operations not directly supported by libCEED.
37 
38   Collective across MPI processes.
39 
40   @param[in]   mat_ceed   `MATCEED` to setup
41   @param[out]  mat_inner  Inner `Mat`
42 
43   @return An error code: 0 - success, otherwise - failure
44 **/
45 static PetscErrorCode MatCeedSetupInnerMat(Mat mat_ceed, Mat *mat_inner) {
46   MatCeedContext ctx;
47 
48   PetscFunctionBeginUser;
49   PetscCall(MatShellGetContext(mat_ceed, &ctx));
50 
51   PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "PC only supported for MATCEED on a single DM");
52 
53   // Check cl mat type
54   {
55     PetscBool is_internal_mat_type_cl = PETSC_FALSE;
56     char      internal_mat_type_cl[64];
57 
58     // Check for specific CL inner mat type for this Mat
59     {
60       const char *mat_ceed_prefix = NULL;
61 
62       PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix));
63       PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL);
64       PetscCall(PetscOptionsFList("-ceed_inner_mat_type", "MATCEED inner assembled MatType for PC support", NULL, MatList, internal_mat_type_cl,
65                                   internal_mat_type_cl, sizeof(internal_mat_type_cl), &is_internal_mat_type_cl));
66       PetscOptionsEnd();
67       if (is_internal_mat_type_cl) {
68         PetscCall(PetscFree(ctx->internal_mat_type));
69         PetscCall(PetscStrallocpy(internal_mat_type_cl, &ctx->internal_mat_type));
70       }
71     }
72   }
73 
74   // Create sparse matrix
75   {
76     MatType dm_mat_type, dm_mat_type_copy;
77 
78     PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type));
79     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
80     PetscCall(DMSetMatType(ctx->dm_x, ctx->internal_mat_type));
81     PetscCall(DMCreateMatrix(ctx->dm_x, mat_inner));
82     PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy));
83     PetscCall(PetscFree(dm_mat_type_copy));
84   }
85   PetscFunctionReturn(PETSC_SUCCESS);
86 }
87 
88 /**
89   @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar.
90          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
91          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
92 
93   Collective across MPI processes.
94 
95   @param[in]      mat_ceed  `MATCEED` to assemble
96   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
97 
98   @return An error code: 0 - success, otherwise - failure
99 **/
100 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) {
101   MatCeedContext ctx;
102 
103   PetscFunctionBeginUser;
104   PetscCall(MatShellGetContext(mat_ceed, &ctx));
105 
106   // Check if COO pattern set
107   {
108     PetscInt index = -1;
109 
110     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
111       if (ctx->mats_assembled_pbd[i] == mat_coo) index = i;
112     }
113     if (index == -1) {
114       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
115       CeedInt      *rows_ceed, *cols_ceed;
116       PetscCount    num_entries;
117       PetscLogStage stage_amg_setup;
118 
119       // -- Assemble sparsity pattern if mat hasn't been assembled before
120       PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup));
121       if (stage_amg_setup == -1) {
122         PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup));
123       }
124       PetscCall(PetscLogStagePush(stage_amg_setup));
125       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
126       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
127       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
128       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
129       free(rows_petsc);
130       free(cols_petsc);
131       if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd));
132       PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd));
133       ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo;
134       PetscCall(PetscLogStagePop());
135     }
136   }
137 
138   // Assemble mat_ceed
139   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
140   {
141     const CeedScalar *values;
142     MatType           mat_type;
143     CeedMemType       mem_type = CEED_MEM_HOST;
144     PetscBool         is_spd, is_spd_known;
145 
146     PetscCall(MatGetType(mat_coo, &mat_type));
147     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
148     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
149     else mem_type = CEED_MEM_HOST;
150 
151     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE));
152     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values));
153     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
154     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
155     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
156     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values));
157   }
158   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
159   PetscFunctionReturn(PETSC_SUCCESS);
160 }
161 
162 /**
163   @brief Assemble inner `Mat` for diagonal `PC` operations
164 
165   Collective across MPI processes.
166 
167   @param[in]   mat_ceed      `MATCEED` to invert
168   @param[in]   use_ceed_pbd  Boolean flag to use libCEED PBD assembly
169   @param[out]  mat_inner     Inner `Mat` for diagonal operations
170 
171   @return An error code: 0 - success, otherwise - failure
172 **/
173 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) {
174   MatCeedContext ctx;
175 
176   PetscFunctionBeginUser;
177   PetscCall(MatShellGetContext(mat_ceed, &ctx));
178   if (use_ceed_pbd) {
179     // Check if COO pattern set
180     if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedSetupInnerMat(mat_ceed, &ctx->mat_assembled_pbd_internal));
181 
182     // Assemble mat_assembled_full_internal
183     PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal));
184     if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal;
185   } else {
186     // Check if COO pattern set
187     if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedSetupInnerMat(mat_ceed, &ctx->mat_assembled_full_internal));
188 
189     // Assemble mat_assembled_full_internal
190     PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
191     if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal;
192   }
193   PetscFunctionReturn(PETSC_SUCCESS);
194 }
195 
196 /**
197   @brief Get `MATCEED` diagonal block for Jacobi.
198 
199   Collective across MPI processes.
200 
201   @param[in]   mat_ceed   `MATCEED` to invert
202   @param[out]  mat_block  The diagonal block matrix
203 
204   @return An error code: 0 - success, otherwise - failure
205 **/
206 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) {
207   Mat            mat_inner = NULL;
208   MatCeedContext ctx;
209 
210   PetscFunctionBeginUser;
211   PetscCall(MatShellGetContext(mat_ceed, &ctx));
212 
213   // Assemble inner mat if needed
214   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
215 
216   // Get block diagonal
217   PetscCall(MatGetDiagonalBlock(mat_inner, mat_block));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 /**
222   @brief Invert `MATCEED` diagonal block for Jacobi.
223 
224   Collective across MPI processes.
225 
226   @param[in]   mat_ceed  `MATCEED` to invert
227   @param[out]  values    The block inverses in column major order
228 
229   @return An error code: 0 - success, otherwise - failure
230 **/
231 static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) {
232   Mat            mat_inner = NULL;
233   MatCeedContext ctx;
234 
235   PetscFunctionBeginUser;
236   PetscCall(MatShellGetContext(mat_ceed, &ctx));
237 
238   // Assemble inner mat if needed
239   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
240 
241   // Invert PB diagonal
242   PetscCall(MatInvertBlockDiagonal(mat_inner, values));
243   PetscFunctionReturn(PETSC_SUCCESS);
244 }
245 
246 /**
247   @brief Invert `MATCEED` variable diagonal block for Jacobi.
248 
249   Collective across MPI processes.
250 
251   @param[in]   mat_ceed     `MATCEED` to invert
252   @param[in]   num_blocks   The number of blocks on the process
253   @param[in]   block_sizes  The size of each block on the process
254   @param[out]  values       The block inverses in column major order
255 
256   @return An error code: 0 - success, otherwise - failure
257 **/
258 static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) {
259   Mat            mat_inner = NULL;
260   MatCeedContext ctx;
261 
262   PetscFunctionBeginUser;
263   PetscCall(MatShellGetContext(mat_ceed, &ctx));
264 
265   // Assemble inner mat if needed
266   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner));
267 
268   // Invert PB diagonal
269   PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 // -----------------------------------------------------------------------------
274 // MatCeed
275 // -----------------------------------------------------------------------------
276 
277 /**
278   @brief Create PETSc `Mat` from libCEED operators.
279 
280   Collective across MPI processes.
281 
282   @param[in]   dm_x                      Input `DM`
283   @param[in]   dm_y                      Output `DM`
284   @param[in]   op_mult                   `CeedOperator` for forward evaluation
285   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
286   @param[out]  mat                        New MatCeed
287 
288   @return An error code: 0 - success, otherwise - failure
289 **/
290 PetscErrorCode MatCeedCreate(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) {
291   PetscInt       X_l_size, X_g_size, Y_l_size, Y_g_size;
292   VecType        vec_type;
293   MatCeedContext ctx;
294 
295   PetscFunctionBeginUser;
296   PetscCall(MatCeedRegisterLogEvents());
297 
298   // Collect context data
299   PetscCall(DMGetVecType(dm_x, &vec_type));
300   {
301     Vec X;
302 
303     PetscCall(DMGetGlobalVector(dm_x, &X));
304     PetscCall(VecGetSize(X, &X_g_size));
305     PetscCall(VecGetLocalSize(X, &X_l_size));
306     PetscCall(DMRestoreGlobalVector(dm_x, &X));
307   }
308   if (dm_y) {
309     Vec Y;
310 
311     PetscCall(DMGetGlobalVector(dm_y, &Y));
312     PetscCall(VecGetSize(Y, &Y_g_size));
313     PetscCall(VecGetLocalSize(Y, &Y_l_size));
314     PetscCall(DMRestoreGlobalVector(dm_y, &Y));
315   } else {
316     dm_y     = dm_x;
317     Y_g_size = X_g_size;
318     Y_l_size = X_l_size;
319   }
320   // Create context
321   {
322     Vec X_loc, Y_loc_transpose = NULL;
323 
324     PetscCall(DMCreateLocalVector(dm_x, &X_loc));
325     PetscCall(VecZeroEntries(X_loc));
326     if (op_mult_transpose) {
327       PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose));
328       PetscCall(VecZeroEntries(Y_loc_transpose));
329     }
330     PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE, &ctx));
331     PetscCall(VecDestroy(&X_loc));
332     PetscCall(VecDestroy(&Y_loc_transpose));
333   }
334 
335   // Create mat
336   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat));
337   PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED));
338   // -- Set block and variable block sizes
339   if (dm_x == dm_y) {
340     MatType dm_mat_type, dm_mat_type_copy;
341     Mat     temp_mat;
342 
343     PetscCall(DMGetMatType(dm_x, &dm_mat_type));
344     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
345     PetscCall(DMSetMatType(dm_x, MATAIJ));
346     PetscCall(DMCreateMatrix(dm_x, &temp_mat));
347     PetscCall(DMSetMatType(dm_x, dm_mat_type_copy));
348     PetscCall(PetscFree(dm_mat_type_copy));
349 
350     {
351       PetscInt        block_size, num_blocks, max_vblock_size = PETSC_INT_MAX;
352       const PetscInt *vblock_sizes;
353 
354       // -- Get block sizes
355       PetscCall(MatGetBlockSize(temp_mat, &block_size));
356       PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes));
357       {
358         PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX};
359 
360         for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]);
361         PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max));
362         max_vblock_size = global_min_max[1];
363       }
364 
365       // -- Copy block sizes
366       if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size));
367       if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes));
368 
369       // -- Check libCEED compatibility
370       {
371         bool is_composite;
372 
373         ctx->is_ceed_pbd_valid  = PETSC_TRUE;
374         ctx->is_ceed_vpbd_valid = PETSC_TRUE;
375         PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite));
376         if (is_composite) {
377           CeedInt       num_sub_operators;
378           CeedOperator *sub_operators;
379 
380           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators));
381           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators));
382           for (CeedInt i = 0; i < num_sub_operators; i++) {
383             CeedInt                  num_bases, num_comp;
384             CeedBasis               *active_bases;
385             CeedOperatorAssemblyData assembly_data;
386 
387             PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data));
388             PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
389             PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
390             if (num_bases > 1) {
391               ctx->is_ceed_pbd_valid  = PETSC_FALSE;
392               ctx->is_ceed_vpbd_valid = PETSC_FALSE;
393             }
394             if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
395             if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
396           }
397         } else {
398           // LCOV_EXCL_START
399           CeedInt                  num_bases, num_comp;
400           CeedBasis               *active_bases;
401           CeedOperatorAssemblyData assembly_data;
402 
403           PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data));
404           PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
405           PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
406           if (num_bases > 1) {
407             ctx->is_ceed_pbd_valid  = PETSC_FALSE;
408             ctx->is_ceed_vpbd_valid = PETSC_FALSE;
409           }
410           if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
411           if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
412           // LCOV_EXCL_STOP
413         }
414         {
415           PetscInt local_is_valid[2], global_is_valid[2];
416 
417           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid;
418           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
419           ctx->is_ceed_pbd_valid = global_is_valid[0];
420           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid;
421           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
422           ctx->is_ceed_vpbd_valid = global_is_valid[0];
423         }
424       }
425     }
426     PetscCall(MatDestroy(&temp_mat));
427   }
428   // -- Set internal mat type
429   {
430     VecType vec_type;
431     MatType internal_mat_type = MATAIJ;
432 
433     PetscCall(VecGetType(ctx->X_loc, &vec_type));
434     if (strstr(vec_type, VECCUDA)) internal_mat_type = MATAIJCUSPARSE;
435     else if (strstr(vec_type, VECKOKKOS)) internal_mat_type = MATAIJKOKKOS;
436     else internal_mat_type = MATAIJ;
437     PetscCall(PetscStrallocpy(internal_mat_type, &ctx->internal_mat_type));
438   }
439   // -- Set mat operations
440   PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
441   PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed));
442   if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
443   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
444   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
445   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
446   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
447   PetscCall(MatShellSetVecType(*mat, vec_type));
448   PetscFunctionReturn(PETSC_SUCCESS);
449 }
450 
451 /**
452   @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`.
453 
454   Collective across MPI processes.
455 
456   @param[in]   mat_ceed   `MATCEED` to copy from
457   @param[out]  mat_other  `MatShell` or `MATCEED` to copy into
458 
459   @return An error code: 0 - success, otherwise - failure
460 **/
461 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) {
462   PetscFunctionBeginUser;
463   PetscCall(MatCeedRegisterLogEvents());
464 
465   // Check type compatibility
466   {
467     MatType mat_type_ceed, mat_type_other;
468 
469     PetscCall(MatGetType(mat_ceed, &mat_type_ceed));
470     PetscCheck(!strcmp(mat_type_ceed, MATCEED), PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED);
471     PetscCall(MatGetType(mat_ceed, &mat_type_other));
472     PetscCheck(!strcmp(mat_type_other, MATCEED) || !strcmp(mat_type_other, MATSHELL), PETSC_COMM_SELF, PETSC_ERR_LIB,
473                "mat_other must have type " MATCEED " or " MATSHELL);
474   }
475 
476   // Check dimension compatibility
477   {
478     PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size;
479 
480     PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size));
481     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size));
482     PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size));
483     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size));
484     PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) &&
485                    (X_l_ceed_size == X_l_other_size),
486                PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ,
487                "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT
488                "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT
489                ", %" PetscInt_FMT ")",
490                Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size);
491   }
492 
493   // Convert
494   {
495     VecType        vec_type;
496     MatCeedContext ctx;
497 
498     PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED));
499     PetscCall(MatShellGetContext(mat_ceed, &ctx));
500     PetscCall(MatCeedContextReference(ctx));
501     PetscCall(MatShellSetContext(mat_other, ctx));
502     PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
503     PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed));
504     if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
505     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
506     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
507     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
508     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
509     {
510       PetscInt block_size;
511 
512       PetscCall(MatGetBlockSize(mat_ceed, &block_size));
513       if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size));
514     }
515     {
516       PetscInt        num_blocks;
517       const PetscInt *block_sizes;
518 
519       PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes));
520       if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes));
521     }
522     PetscCall(DMGetVecType(ctx->dm_x, &vec_type));
523     PetscCall(MatShellSetVecType(mat_other, vec_type));
524   }
525   PetscFunctionReturn(PETSC_SUCCESS);
526 }
527 
528 /**
529   @brief Assemble a `MATCEED` into a `MATAIJ` or similar.
530          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
531          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
532 
533   Collective across MPI processes.
534 
535   @param[in]      mat_ceed  `MATCEED` to assemble
536   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
537 
538   @return An error code: 0 - success, otherwise - failure
539 **/
540 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) {
541   MatCeedContext ctx;
542 
543   PetscFunctionBeginUser;
544   PetscCall(MatShellGetContext(mat_ceed, &ctx));
545 
546   // Check if COO pattern set
547   {
548     PetscInt index = -1;
549 
550     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
551       if (ctx->mats_assembled_full[i] == mat_coo) index = i;
552     }
553     if (index == -1) {
554       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
555       CeedInt      *rows_ceed, *cols_ceed;
556       PetscCount    num_entries;
557       PetscLogStage stage_amg_setup;
558 
559       // -- Assemble sparsity pattern if mat hasn't been assembled before
560       PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup));
561       if (stage_amg_setup == -1) {
562         PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup));
563       }
564       PetscCall(PetscLogStagePush(stage_amg_setup));
565       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
566       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
567       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
568       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
569       free(rows_petsc);
570       free(cols_petsc);
571       if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full));
572       PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full));
573       ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo;
574       PetscCall(PetscLogStagePop());
575     }
576   }
577 
578   // Assemble mat_ceed
579   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
580   {
581     const CeedScalar *values;
582     MatType           mat_type;
583     CeedMemType       mem_type = CEED_MEM_HOST;
584     PetscBool         is_spd, is_spd_known;
585 
586     PetscCall(MatGetType(mat_coo, &mat_type));
587     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
588     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
589     else mem_type = CEED_MEM_HOST;
590 
591     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full));
592     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values));
593     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
594     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
595     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
596     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values));
597   }
598   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
599   PetscFunctionReturn(PETSC_SUCCESS);
600 }
601 
602 /**
603   @brief Set user context for a `MATCEED`.
604 
605   Collective across MPI processes.
606 
607   @param[in,out]  mat  `MATCEED`
608   @param[in]      f    The context destroy function, or NULL
609   @param[in]      ctx  User context, or NULL to unset
610 
611   @return An error code: 0 - success, otherwise - failure
612 **/
613 PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) {
614   PetscContainer user_ctx = NULL;
615 
616   PetscFunctionBeginUser;
617   if (ctx) {
618     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx));
619     PetscCall(PetscContainerSetPointer(user_ctx, ctx));
620     PetscCall(PetscContainerSetUserDestroy(user_ctx, f));
621   }
622   PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx));
623   PetscCall(PetscContainerDestroy(&user_ctx));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 /**
628   @brief Retrieve the user context for a `MATCEED`.
629 
630   Collective across MPI processes.
631 
632   @param[in,out]  mat  `MATCEED`
633   @param[in]      ctx  User context
634 
635   @return An error code: 0 - success, otherwise - failure
636 **/
637 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) {
638   PetscContainer user_ctx;
639 
640   PetscFunctionBeginUser;
641   PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx));
642   if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx));
643   else *(void **)ctx = NULL;
644   PetscFunctionReturn(PETSC_SUCCESS);
645 }
646 
647 /**
648   @brief Sets the inner matrix type as a string from the `MATCEED`.
649 
650   Collective across MPI processes.
651 
652   @param[in,out]  mat   `MATCEED`
653   @param[in]      type  Inner `MatType` to set
654 
655   @return An error code: 0 - success, otherwise - failure
656 **/
657 PetscErrorCode MatCeedSetInnerMatType(Mat mat, MatType type) {
658   MatCeedContext ctx;
659 
660   PetscFunctionBeginUser;
661   PetscCall(MatShellGetContext(mat, &ctx));
662   // Check if same
663   {
664     size_t    len_old, len_new;
665     PetscBool is_same = PETSC_FALSE;
666 
667     PetscCall(PetscStrlen(ctx->internal_mat_type, &len_old));
668     PetscCall(PetscStrlen(type, &len_new));
669     if (len_old == len_new) PetscCall(PetscStrncmp(ctx->internal_mat_type, type, len_old, &is_same));
670     if (is_same) PetscFunctionReturn(PETSC_SUCCESS);
671   }
672   // Clean up old mats in different format
673   // LCOV_EXCL_START
674   if (ctx->mat_assembled_full_internal) {
675     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
676       if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) {
677         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) {
678           ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j];
679         }
680         ctx->num_mats_assembled_full--;
681         // Note: we'll realloc this array again, so no need to shrink the allocation
682         PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
683       }
684     }
685   }
686   if (ctx->mat_assembled_pbd_internal) {
687     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
688       if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) {
689         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) {
690           ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j];
691         }
692         // Note: we'll realloc this array again, so no need to shrink the allocation
693         ctx->num_mats_assembled_pbd--;
694         PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
695       }
696     }
697   }
698   PetscCall(PetscFree(ctx->internal_mat_type));
699   PetscCall(PetscStrallocpy(type, &ctx->internal_mat_type));
700   PetscFunctionReturn(PETSC_SUCCESS);
701   // LCOV_EXCL_STOP
702 }
703 
704 /**
705   @brief Gets the inner matrix type as a string from the `MATCEED`.
706 
707   Collective across MPI processes.
708 
709   @param[in,out]  mat   `MATCEED`
710   @param[in]      type  Inner `MatType`
711 
712   @return An error code: 0 - success, otherwise - failure
713 **/
714 PetscErrorCode MatCeedGetInnerMatType(Mat mat, MatType *type) {
715   MatCeedContext ctx;
716 
717   PetscFunctionBeginUser;
718   PetscCall(MatShellGetContext(mat, &ctx));
719   *type = ctx->internal_mat_type;
720   PetscFunctionReturn(PETSC_SUCCESS);
721 }
722 
723 /**
724   @brief Set a user defined matrix operation for a `MATCEED` matrix.
725 
726   Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by
727 `MatCeedSetContext()`.
728 
729   Collective across MPI processes.
730 
731   @param[in,out]  mat  `MATCEED`
732   @param[in]      op   Name of the `MatOperation`
733   @param[in]      g    Function that provides the operation
734 
735   @return An error code: 0 - success, otherwise - failure
736 **/
737 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) {
738   PetscFunctionBeginUser;
739   PetscCall(MatShellSetOperation(mat, op, g));
740   PetscFunctionReturn(PETSC_SUCCESS);
741 }
742 
743 /**
744   @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
745 
746   Not collective across MPI processes.
747 
748   @param[in,out]  mat              `MATCEED`
749   @param[in]      X_loc            Input PETSc local vector, or NULL
750   @param[in]      Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
751 
752   @return An error code: 0 - success, otherwise - failure
753 **/
754 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) {
755   MatCeedContext ctx;
756 
757   PetscFunctionBeginUser;
758   PetscCall(MatShellGetContext(mat, &ctx));
759   if (X_loc) {
760     PetscInt len_old, len_new;
761 
762     PetscCall(VecGetSize(ctx->X_loc, &len_old));
763     PetscCall(VecGetSize(X_loc, &len_new));
764     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT,
765                len_new, len_old);
766     PetscCall(VecDestroy(&ctx->X_loc));
767     ctx->X_loc = X_loc;
768     PetscCall(PetscObjectReference((PetscObject)X_loc));
769   }
770   if (Y_loc_transpose) {
771     PetscInt len_old, len_new;
772 
773     PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old));
774     PetscCall(VecGetSize(Y_loc_transpose, &len_new));
775     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB,
776                "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old);
777     PetscCall(VecDestroy(&ctx->Y_loc_transpose));
778     ctx->Y_loc_transpose = Y_loc_transpose;
779     PetscCall(PetscObjectReference((PetscObject)Y_loc_transpose));
780   }
781   PetscFunctionReturn(PETSC_SUCCESS);
782 }
783 
784 /**
785   @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
786 
787   Not collective across MPI processes.
788 
789   @param[in,out]  mat              `MATCEED`
790   @param[out]     X_loc            Input PETSc local vector, or NULL
791   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
792 
793   @return An error code: 0 - success, otherwise - failure
794 **/
795 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
796   MatCeedContext ctx;
797 
798   PetscFunctionBeginUser;
799   PetscCall(MatShellGetContext(mat, &ctx));
800   if (X_loc) {
801     *X_loc = ctx->X_loc;
802     PetscCall(PetscObjectReference((PetscObject)*X_loc));
803   }
804   if (Y_loc_transpose) {
805     *Y_loc_transpose = ctx->Y_loc_transpose;
806     PetscCall(PetscObjectReference((PetscObject)*Y_loc_transpose));
807   }
808   PetscFunctionReturn(PETSC_SUCCESS);
809 }
810 
811 /**
812   @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
813 
814   Not collective across MPI processes.
815 
816   @param[in,out]  mat              MatCeed
817   @param[out]     X_loc            Input PETSc local vector, or NULL
818   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
819 
820   @return An error code: 0 - success, otherwise - failure
821 **/
822 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
823   PetscFunctionBeginUser;
824   if (X_loc) PetscCall(VecDestroy(X_loc));
825   if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose));
826   PetscFunctionReturn(PETSC_SUCCESS);
827 }
828 
829 /**
830   @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
831 
832   Not collective across MPI processes.
833 
834   @param[in,out]  mat                MatCeed
835   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
836   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
837 
838   @return An error code: 0 - success, otherwise - failure
839 **/
840 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
841   MatCeedContext ctx;
842 
843   PetscFunctionBeginUser;
844   PetscCall(MatShellGetContext(mat, &ctx));
845   if (op_mult) {
846     *op_mult = NULL;
847     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult));
848   }
849   if (op_mult_transpose) {
850     *op_mult_transpose = NULL;
851     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose));
852   }
853   PetscFunctionReturn(PETSC_SUCCESS);
854 }
855 
856 /**
857   @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
858 
859   Not collective across MPI processes.
860 
861   @param[in,out]  mat                MatCeed
862   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
863   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
864 
865   @return An error code: 0 - success, otherwise - failure
866 **/
867 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
868   MatCeedContext ctx;
869 
870   PetscFunctionBeginUser;
871   PetscCall(MatShellGetContext(mat, &ctx));
872   if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult));
873   if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose));
874   PetscFunctionReturn(PETSC_SUCCESS);
875 }
876 
877 /**
878   @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
879 
880   Not collective across MPI processes.
881 
882   @param[in,out]  mat                       MatCeed
883   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
884   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
885 
886   @return An error code: 0 - success, otherwise - failure
887 **/
888 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
889   MatCeedContext ctx;
890 
891   PetscFunctionBeginUser;
892   PetscCall(MatShellGetContext(mat, &ctx));
893   if (log_event_mult) ctx->log_event_mult = log_event_mult;
894   if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose;
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 /**
899   @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
900 
901   Not collective across MPI processes.
902 
903   @param[in,out]  mat                       MatCeed
904   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
905   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
906 
907   @return An error code: 0 - success, otherwise - failure
908 **/
909 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
910   MatCeedContext ctx;
911 
912   PetscFunctionBeginUser;
913   PetscCall(MatShellGetContext(mat, &ctx));
914   if (log_event_mult) *log_event_mult = ctx->log_event_mult;
915   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose;
916   PetscFunctionReturn(PETSC_SUCCESS);
917 }
918 
919 // -----------------------------------------------------------------------------
920 // Operator context data
921 // -----------------------------------------------------------------------------
922 
923 /**
924   @brief Setup context data for operator application.
925 
926   Collective across MPI processes.
927 
928   @param[in]   dm_x                      Input `DM`
929   @param[in]   dm_y                      Output `DM`
930   @param[in]   X_loc                     Input PETSc local vector, or NULL
931   @param[in]   Y_loc_transpose           Input PETSc local vector for transpose operation, or NULL
932   @param[in]   op_mult                   `CeedOperator` for forward evaluation
933   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
934   @param[in]   log_event_mult            `PetscLogEvent` for forward evaluation
935   @param[in]   log_event_mult_transpose  `PetscLogEvent` for transpose evaluation
936   @param[out]  ctx                       Context data for operator evaluation
937 
938   @return An error code: 0 - success, otherwise - failure
939 **/
940 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose,
941                                     PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, MatCeedContext *ctx) {
942   CeedSize x_loc_len, y_loc_len;
943 
944   PetscFunctionBeginUser;
945 
946   // Allocate
947   PetscCall(PetscNew(ctx));
948   (*ctx)->ref_count = 1;
949 
950   // Logging
951   (*ctx)->log_event_mult           = log_event_mult;
952   (*ctx)->log_event_mult_transpose = log_event_mult_transpose;
953 
954   // PETSc objects
955   PetscCall(PetscObjectReference((PetscObject)dm_x));
956   (*ctx)->dm_x = dm_x;
957   PetscCall(PetscObjectReference((PetscObject)dm_y));
958   (*ctx)->dm_y = dm_y;
959   if (X_loc) PetscCall(PetscObjectReference((PetscObject)X_loc));
960   (*ctx)->X_loc = X_loc;
961   if (Y_loc_transpose) PetscCall(PetscObjectReference((PetscObject)Y_loc_transpose));
962   (*ctx)->Y_loc_transpose = Y_loc_transpose;
963 
964   // Memtype
965   {
966     const PetscScalar *x;
967     Vec                X;
968 
969     PetscCall(DMGetLocalVector(dm_x, &X));
970     PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type));
971     PetscCall(VecRestoreArrayReadAndMemType(X, &x));
972     PetscCall(DMRestoreLocalVector(dm_x, &X));
973   }
974 
975   // libCEED objects
976   PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB,
977              "retrieving Ceed context object failed");
978   PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed));
979   PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len));
980   PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult));
981   if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose));
982   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc));
983   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc));
984 
985   // Flop counting
986   {
987     CeedSize ceed_flops_estimate = 0;
988 
989     PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate));
990     (*ctx)->flops_mult = ceed_flops_estimate;
991     if (op_mult_transpose) {
992       PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate));
993       (*ctx)->flops_mult_transpose = ceed_flops_estimate;
994     }
995   }
996 
997   // Check sizes
998   if (x_loc_len > 0 || y_loc_len > 0) {
999     CeedSize ctx_x_loc_len, ctx_y_loc_len;
1000     PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len;
1001     Vec      dm_X_loc, dm_Y_loc;
1002 
1003     // -- Input
1004     PetscCall(DMGetLocalVector(dm_x, &dm_X_loc));
1005     PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len));
1006     PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc));
1007     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len));
1008     if (X_loc) {
1009       PetscCall(VecGetLocalSize(X_loc, &X_loc_len));
1010       PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1011                  "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len);
1012     }
1013     PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions",
1014                x_loc_len, dm_x_loc_len);
1015     PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")",
1016                x_loc_len, ctx_x_loc_len);
1017 
1018     // -- Output
1019     PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc));
1020     PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len));
1021     PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc));
1022     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len));
1023     PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions",
1024                ctx_y_loc_len, dm_y_loc_len);
1025 
1026     // -- Transpose
1027     if (Y_loc_transpose) {
1028       PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len));
1029       PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1030                  "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len);
1031     }
1032   }
1033   PetscFunctionReturn(PETSC_SUCCESS);
1034 }
1035 
1036 /**
1037   @brief Increment reference counter for `MATCEED` context.
1038 
1039   Not collective across MPI processes.
1040 
1041   @param[in,out]  ctx  Context data
1042 
1043   @return An error code: 0 - success, otherwise - failure
1044 **/
1045 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) {
1046   PetscFunctionBeginUser;
1047   ctx->ref_count++;
1048   PetscFunctionReturn(PETSC_SUCCESS);
1049 }
1050 
1051 /**
1052   @brief Copy reference for `MATCEED`.
1053          Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`.
1054 
1055   Not collective across MPI processes.
1056 
1057   @param[in]   ctx       Context data
1058   @param[out]  ctx_copy  Copy of pointer to context data
1059 
1060   @return An error code: 0 - success, otherwise - failure
1061 **/
1062 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) {
1063   PetscFunctionBeginUser;
1064   PetscCall(MatCeedContextReference(ctx));
1065   PetscCall(MatCeedContextDestroy(*ctx_copy));
1066   *ctx_copy = ctx;
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 /**
1071   @brief Destroy context data for operator application.
1072 
1073   Collective across MPI processes.
1074 
1075   @param[in,out]  ctx  Context data for operator evaluation
1076 
1077   @return An error code: 0 - success, otherwise - failure
1078 **/
1079 PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) {
1080   PetscFunctionBeginUser;
1081   if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS);
1082 
1083   // PETSc objects
1084   PetscCall(DMDestroy(&ctx->dm_x));
1085   PetscCall(DMDestroy(&ctx->dm_y));
1086   PetscCall(VecDestroy(&ctx->X_loc));
1087   PetscCall(VecDestroy(&ctx->Y_loc_transpose));
1088   PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1089   PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1090   PetscCall(PetscFree(ctx->internal_mat_type));
1091   PetscCall(PetscFree(ctx->mats_assembled_full));
1092   PetscCall(PetscFree(ctx->mats_assembled_pbd));
1093 
1094   // libCEED objects
1095   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc));
1096   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc));
1097   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full));
1098   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd));
1099   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult));
1100   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose));
1101   PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed");
1102 
1103   // Deallocate
1104   ctx->is_destroyed = PETSC_TRUE;  // Flag as destroyed in case someone has stale ref
1105   PetscCall(PetscFree(ctx));
1106   PetscFunctionReturn(PETSC_SUCCESS);
1107 }
1108 
1109 /**
1110   @brief Compute the diagonal of an operator via libCEED.
1111 
1112   Collective across MPI processes.
1113 
1114   @param[in]   A  `MATCEED`
1115   @param[out]  D  Vector holding operator diagonal
1116 
1117   @return An error code: 0 - success, otherwise - failure
1118 **/
1119 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) {
1120   PetscMemType   mem_type;
1121   Vec            D_loc;
1122   MatCeedContext ctx;
1123 
1124   PetscFunctionBeginUser;
1125   PetscCall(MatShellGetContext(A, &ctx));
1126 
1127   // Place PETSc vector in libCEED vector
1128   PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc));
1129   PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc));
1130 
1131   // Compute Diagonal
1132   PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1133 
1134   // Restore PETSc vector
1135   PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc));
1136 
1137   // Local-to-Global
1138   PetscCall(VecZeroEntries(D));
1139   PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D));
1140   PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc));
1141   PetscFunctionReturn(PETSC_SUCCESS);
1142 }
1143 
1144 /**
1145   @brief Compute `A X = Y` for a `MATCEED`.
1146 
1147   Collective across MPI processes.
1148 
1149   @param[in]   A  `MATCEED`
1150   @param[in]   X  Input PETSc vector
1151   @param[out]  Y  Output PETSc vector
1152 
1153   @return An error code: 0 - success, otherwise - failure
1154 **/
1155 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) {
1156   MatCeedContext ctx;
1157 
1158   PetscFunctionBeginUser;
1159   PetscCall(MatShellGetContext(A, &ctx));
1160   PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, 0));
1161 
1162   {
1163     PetscMemType x_mem_type, y_mem_type;
1164     Vec          X_loc = ctx->X_loc, Y_loc;
1165 
1166     // Get local vectors
1167     if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1168     PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1169 
1170     // Global-to-local
1171     PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc));
1172 
1173     // Setup libCEED vectors
1174     PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1175     PetscCall(VecZeroEntries(Y_loc));
1176     PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1177 
1178     // Apply libCEED operator
1179     PetscCall(PetscLogGpuTimeBegin());
1180     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE));
1181     PetscCall(PetscLogGpuTimeEnd());
1182 
1183     // Restore PETSc vectors
1184     PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1185     PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1186 
1187     // Local-to-global
1188     PetscCall(VecZeroEntries(Y));
1189     PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y));
1190 
1191     // Restore local vectors, as needed
1192     if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1193     PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1194   }
1195 
1196   // Log flops
1197   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult));
1198   else PetscCall(PetscLogFlops(ctx->flops_mult));
1199 
1200   PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, 0));
1201   PetscFunctionReturn(PETSC_SUCCESS);
1202 }
1203 
1204 /**
1205   @brief Compute `A^T Y = X` for a `MATCEED`.
1206 
1207   Collective across MPI processes.
1208 
1209   @param[in]   A  `MATCEED`
1210   @param[in]   Y  Input PETSc vector
1211   @param[out]  X  Output PETSc vector
1212 
1213   @return An error code: 0 - success, otherwise - failure
1214 **/
1215 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) {
1216   MatCeedContext ctx;
1217 
1218   PetscFunctionBeginUser;
1219   PetscCall(MatShellGetContext(A, &ctx));
1220   PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, 0));
1221 
1222   {
1223     PetscMemType x_mem_type, y_mem_type;
1224     Vec          X_loc, Y_loc = ctx->Y_loc_transpose;
1225 
1226     // Get local vectors
1227     if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1228     PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1229 
1230     // Global-to-local
1231     PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc));
1232 
1233     // Setup libCEED vectors
1234     PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1235     PetscCall(VecZeroEntries(X_loc));
1236     PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1237 
1238     // Apply libCEED operator
1239     PetscCall(PetscLogGpuTimeBegin());
1240     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1241     PetscCall(PetscLogGpuTimeEnd());
1242 
1243     // Restore PETSc vectors
1244     PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1245     PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1246 
1247     // Local-to-global
1248     PetscCall(VecZeroEntries(X));
1249     PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X));
1250 
1251     // Restore local vectors, as needed
1252     if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1253     PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1254   }
1255 
1256   // Log flops
1257   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose));
1258   else PetscCall(PetscLogFlops(ctx->flops_mult_transpose));
1259 
1260   PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, 0));
1261   PetscFunctionReturn(PETSC_SUCCESS);
1262 }
1263