xref: /honee/src/mat-ceed.c (revision dfdd2b92c47737c13e412ceefeabe0da7d3e1056)
1 // SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors.
2 // SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause
3 /// @file
4 /// MatCEED implementation
5 
6 #include <ceed.h>
7 #include <ceed/backend.h>
8 #include <mat-ceed-impl.h>
9 #include <mat-ceed.h>
10 #include <petsc-ceed-utils.h>
11 #include <petsc-ceed.h>
12 #include <petscdm.h>
13 #include <petscmat.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 
19 PetscClassId  MATCEED_CLASSID;
20 PetscLogEvent MATCEED_MULT, MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE, MATCEED_MULT_TRANSPOSE_CEEDOP, MATCEED_ASSEMBLE_DIAGONAL,
21     MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, MATCEED_SETUP_PBDIAGONAL, MATCEED_SETUP_PBDIAGONAL_CEEDOP, MATCEED_ASSEMBLE_PBDIAGONAL,
22     MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, MATCEED_SETUP_FULL, MATCEED_SETUP_FULL_CEEDOP, MATCEED_ASSEMBLE_FULL, MATCEED_ASSEMBLE_FULL_CEEDOP;
23 
24 /**
25   @brief Register MATCEED log events.
26 
27   Not collective across MPI processes.
28 
29   @return An error code: 0 - success, otherwise - failure
30 **/
31 static PetscErrorCode MatCeedRegisterLogEvents() {
32   static PetscBool registered = PETSC_FALSE;
33 
34   PetscFunctionBeginUser;
35   if (registered) PetscFunctionReturn(PETSC_SUCCESS);
36   PetscCall(PetscClassIdRegister("MatCEED", &MATCEED_CLASSID));
37   PetscCall(PetscLogEventRegister("MatCEEDMul", MATCEED_CLASSID, &MATCEED_MULT));
38   PetscCall(PetscLogEventRegister("MatCEEDMulCeed", MATCEED_CLASSID, &MATCEED_MULT_CEEDOP));
39   PetscCall(PetscLogEventRegister("MatCEEDMulT", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE));
40   PetscCall(PetscLogEventRegister("MatCEEDMulTCeed", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE_CEEDOP));
41   PetscCall(PetscLogEventRegister("MatCEEDAsmDiag", MATCEED_CLASSID, &MATCEED_ASSEMBLE_DIAGONAL));
42   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSU", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL));
43   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL_CEEDOP));
44   PetscCall(PetscLogEventRegister("MatCEEDAsmPBD", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL));
45   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP));
46   PetscCall(PetscLogEventRegister("MatCEEDAsmSU", MATCEED_CLASSID, &MATCEED_SETUP_FULL));
47   PetscCall(PetscLogEventRegister("MatCEEDAsmSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_FULL_CEEDOP));
48   PetscCall(PetscLogEventRegister("MatCEEDAsm", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL));
49   PetscCall(PetscLogEventRegister("MatCEEDAsmCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL_CEEDOP));
50   registered = PETSC_TRUE;
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 /**
55   @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar.
56          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
57          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
58 
59   Collective across MPI processes.
60 
61   @param[in]      mat_ceed  `MATCEED` to assemble
62   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
63 
64   @return An error code: 0 - success, otherwise - failure
65 **/
66 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) {
67   MatCeedContext ctx;
68 
69   PetscFunctionBeginUser;
70   PetscCall(MatShellGetContext(mat_ceed, &ctx));
71 
72   // Check if COO pattern set
73   {
74     PetscInt index = -1;
75 
76     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
77       if (ctx->mats_assembled_pbd[i] == mat_coo) index = i;
78     }
79     if (index == -1) {
80       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
81       CeedInt      *rows_ceed, *cols_ceed;
82       PetscCount    num_entries;
83       PetscLogStage stage_amg_setup;
84 
85       // -- Assemble sparsity pattern if mat hasn't been assembled before
86       PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
87       if (stage_amg_setup == -1) {
88         PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
89       }
90       PetscCall(PetscLogStagePush(stage_amg_setup));
91       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
92       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
93       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
94       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
95       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
96       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
97       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
98       free(rows_petsc);
99       free(cols_petsc);
100       if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd));
101       PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd));
102       ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo;
103       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
104       PetscCall(PetscLogStagePop());
105     }
106   }
107 
108   // Assemble mat_ceed
109   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
110   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
111   {
112     const CeedScalar *values;
113     MatType           mat_type;
114     CeedMemType       mem_type = CEED_MEM_HOST;
115     PetscBool         is_spd, is_spd_known;
116 
117     PetscCall(MatGetType(mat_coo, &mat_type));
118     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
119     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
120     else mem_type = CEED_MEM_HOST;
121 
122     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
123     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE));
124     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
125     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values));
126     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
127     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
128     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
129     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values));
130   }
131   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
132   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
133   PetscFunctionReturn(PETSC_SUCCESS);
134 }
135 
136 /**
137   @brief Assemble inner `Mat` for diagonal `PC` operations
138 
139   Collective across MPI processes.
140 
141   @param[in]   mat_ceed      `MATCEED` to invert
142   @param[in]   use_ceed_pbd  Boolean flag to use libCEED PBD assembly
143   @param[out]  mat_inner     Inner `Mat` for diagonal operations
144 
145   @return An error code: 0 - success, otherwise - failure
146 **/
147 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) {
148   MatCeedContext ctx;
149 
150   PetscFunctionBeginUser;
151   PetscCall(MatShellGetContext(mat_ceed, &ctx));
152   if (use_ceed_pbd) {
153     // Check if COO pattern set
154     if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_pbd_internal));
155 
156     // Assemble mat_assembled_full_internal
157     PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal));
158     if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal;
159   } else {
160     // Check if COO pattern set
161     if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
162 
163     // Assemble mat_assembled_full_internal
164     PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
165     if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal;
166   }
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 /**
171   @brief Get on-process diagonal block of `MATCEED`
172 
173   This is a block per-process of the diagonal of the global matrix.
174   This is NOT the diagonal blocks associated with the block size of the matrix (i.e. `MatSetBlockSize()` has no effect on this function).
175 
176   Collective across MPI processes.
177 
178   @param[in]   mat_ceed   `MATCEED` to invert
179   @param[out]  mat_block  The diagonal block matrix
180 
181   @return An error code: 0 - success, otherwise - failure
182 **/
183 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) {
184   MatCeedContext ctx;
185 
186   PetscFunctionBeginUser;
187   PetscCall(MatShellGetContext(mat_ceed, &ctx));
188 
189   // Check if COO pattern set
190   if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
191 
192   // Assemble mat_assembled_full_internal
193   PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
194 
195   // Get diagonal block
196   PetscCall(MatGetDiagonalBlock(ctx->mat_assembled_full_internal, mat_block));
197   PetscFunctionReturn(PETSC_SUCCESS);
198 }
199 
200 /**
201   @brief Invert `MATCEED` diagonal block for Jacobi.
202 
203   Collective across MPI processes.
204 
205   @param[in]   mat_ceed  `MATCEED` to invert
206   @param[out]  values    The block inverses in column major order
207 
208   @return An error code: 0 - success, otherwise - failure
209 **/
210 static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) {
211   Mat            mat_inner = NULL;
212   MatCeedContext ctx;
213 
214   PetscFunctionBeginUser;
215   PetscCall(MatShellGetContext(mat_ceed, &ctx));
216 
217   // Assemble inner mat if needed
218   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
219 
220   // Invert PB diagonal
221   PetscCall(MatInvertBlockDiagonal(mat_inner, values));
222   PetscFunctionReturn(PETSC_SUCCESS);
223 }
224 
225 /**
226   @brief Invert `MATCEED` variable diagonal block for Jacobi.
227 
228   Collective across MPI processes.
229 
230   @param[in]   mat_ceed     `MATCEED` to invert
231   @param[in]   num_blocks   The number of blocks on the process
232   @param[in]   block_sizes  The size of each block on the process
233   @param[out]  values       The block inverses in column major order
234 
235   @return An error code: 0 - success, otherwise - failure
236 **/
237 static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) {
238   Mat            mat_inner = NULL;
239   MatCeedContext ctx;
240 
241   PetscFunctionBeginUser;
242   PetscCall(MatShellGetContext(mat_ceed, &ctx));
243 
244   // Assemble inner mat if needed
245   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner));
246 
247   // Invert PB diagonal
248   PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values));
249   PetscFunctionReturn(PETSC_SUCCESS);
250 }
251 
252 /**
253   @brief View `MATCEED`.
254 
255   Collective across MPI processes.
256 
257   @param[in]   mat_ceed  `MATCEED` to view
258   @param[in]   viewer    The visualization context
259 
260   @return An error code: 0 - success, otherwise - failure
261 **/
262 static PetscErrorCode MatView_Ceed(Mat mat_ceed, PetscViewer viewer) {
263   PetscBool         is_ascii;
264   PetscViewerFormat format;
265   PetscMPIInt       size, rank;
266   MatCeedContext    ctx;
267 
268   PetscFunctionBeginUser;
269   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
270   PetscCall(MatShellGetContext(mat_ceed, &ctx));
271   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)mat_ceed), &viewer));
272 
273   PetscCall(PetscViewerGetFormat(viewer, &format));
274   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat_ceed), &size));
275   if (size == 1 && format == PETSC_VIEWER_LOAD_BALANCE) PetscFunctionReturn(PETSC_SUCCESS);
276 
277   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat_ceed), &rank));
278   if (rank != 0) PetscFunctionReturn(PETSC_SUCCESS);
279 
280   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &is_ascii));
281   {
282     PetscBool is_detailed     = format == PETSC_VIEWER_ASCII_INFO_DETAIL;
283     char      rank_string[16] = {'\0'};
284     FILE     *file;
285 
286     PetscCall(PetscViewerASCIIPrintf(viewer, "MatCEED:\n"));
287     PetscCall(PetscViewerASCIIPushTab(viewer));  // MatCEED
288     PetscCall(PetscViewerASCIIPrintf(viewer, "Default COO MatType: %s\n", ctx->coo_mat_type));
289     PetscCall(PetscSNPrintf(rank_string, 16, "on Rank %d", rank));
290     PetscCall(PetscViewerASCIIPrintf(viewer, "CeedOperator Apply %s:\n", is_detailed ? rank_string : "Summary"));
291     PetscCall(PetscViewerASCIIGetPointer(viewer, &file));
292     PetscCall(PetscViewerASCIIPushTab(viewer));  // CeedOperator
293     if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult, file));
294     else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult, file));
295     PetscCall(PetscViewerASCIIPopTab(viewer));  // CeedOperator
296     if (ctx->op_mult_transpose) {
297       PetscCall(PetscViewerASCIIPrintf(viewer, "CeedOperator ApplyTranspose %s:\n", is_detailed ? rank_string : "Summary"));
298       PetscCall(PetscViewerASCIIPushTab(viewer));  // CeedOperator
299       if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult_transpose, file));
300       else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult_transpose, file));
301       PetscCall(PetscViewerASCIIPopTab(viewer));  // CeedOperator
302     }
303     PetscCall(PetscViewerASCIIPopTab(viewer));  // MatCEED
304   }
305   PetscFunctionReturn(PETSC_SUCCESS);
306 }
307 
308 // -----------------------------------------------------------------------------
309 // MatCeed
310 // -----------------------------------------------------------------------------
311 
312 /**
313   @brief Create PETSc `Mat` from libCEED operators.
314 
315   Collective across MPI processes.
316 
317   @param[in]   dm_x                      Input `DM`
318   @param[in]   dm_y                      Output `DM`
319   @param[in]   op_mult                   `CeedOperator` for forward evaluation
320   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
321   @param[out]  mat                        New MatCeed
322 
323   @return An error code: 0 - success, otherwise - failure
324 **/
325 PetscErrorCode MatCreateCeed(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) {
326   PetscInt       X_l_size, X_g_size, Y_l_size, Y_g_size;
327   VecType        vec_type;
328   MatCeedContext ctx;
329 
330   PetscFunctionBeginUser;
331   PetscCall(MatCeedRegisterLogEvents());
332 
333   // Collect context data
334   PetscCall(DMGetVecType(dm_x, &vec_type));
335   {
336     Vec X;
337 
338     PetscCall(DMGetGlobalVector(dm_x, &X));
339     PetscCall(VecGetSize(X, &X_g_size));
340     PetscCall(VecGetLocalSize(X, &X_l_size));
341     PetscCall(DMRestoreGlobalVector(dm_x, &X));
342   }
343   if (dm_y) {
344     Vec Y;
345 
346     PetscCall(DMGetGlobalVector(dm_y, &Y));
347     PetscCall(VecGetSize(Y, &Y_g_size));
348     PetscCall(VecGetLocalSize(Y, &Y_l_size));
349     PetscCall(DMRestoreGlobalVector(dm_y, &Y));
350   } else {
351     dm_y     = dm_x;
352     Y_g_size = X_g_size;
353     Y_l_size = X_l_size;
354   }
355 
356   // Create context
357   {
358     Vec X_loc, Y_loc_transpose = NULL;
359 
360     PetscCall(DMCreateLocalVector(dm_x, &X_loc));
361     PetscCall(VecZeroEntries(X_loc));
362     if (op_mult_transpose) {
363       PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose));
364       PetscCall(VecZeroEntries(Y_loc_transpose));
365     }
366     PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE,
367                                    MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE_CEEDOP, &ctx));
368     PetscCall(VecDestroy(&X_loc));
369     PetscCall(VecDestroy(&Y_loc_transpose));
370   }
371 
372   // Create mat
373   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat));
374   PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED));
375   // -- Set block and variable block sizes
376   if (dm_x == dm_y) {
377     MatType dm_mat_type, dm_mat_type_copy;
378     Mat     temp_mat;
379 
380     PetscCall(DMGetMatType(dm_x, &dm_mat_type));
381     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
382     PetscCall(DMSetMatType(dm_x, MATAIJ));
383     PetscCall(DMCreateMatrix(dm_x, &temp_mat));
384     PetscCall(DMSetMatType(dm_x, dm_mat_type_copy));
385     PetscCall(PetscFree(dm_mat_type_copy));
386 
387     {
388       PetscInt        block_size, num_blocks, max_vblock_size = PETSC_INT_MAX;
389       const PetscInt *vblock_sizes;
390 
391       // -- Get block sizes
392       PetscCall(MatGetBlockSize(temp_mat, &block_size));
393       PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes));
394       {
395         PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX};
396 
397         for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]);
398         PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max));
399         max_vblock_size = global_min_max[1];
400       }
401 
402       // -- Copy block sizes
403       if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size));
404       if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes));
405 
406       // -- Check libCEED compatibility
407       {
408         bool is_composite;
409 
410         ctx->is_ceed_pbd_valid  = PETSC_TRUE;
411         ctx->is_ceed_vpbd_valid = PETSC_TRUE;
412         PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite));
413         if (is_composite) {
414           CeedInt       num_sub_operators;
415           CeedOperator *sub_operators;
416 
417           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators));
418           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators));
419           for (CeedInt i = 0; i < num_sub_operators; i++) {
420             CeedInt                  num_bases, num_comp;
421             CeedBasis               *active_bases;
422             CeedOperatorAssemblyData assembly_data;
423 
424             PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data));
425             PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
426             PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
427             if (num_bases > 1) {
428               ctx->is_ceed_pbd_valid  = PETSC_FALSE;
429               ctx->is_ceed_vpbd_valid = PETSC_FALSE;
430             }
431             if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
432             if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
433           }
434         } else {
435           // LCOV_EXCL_START
436           CeedInt                  num_bases, num_comp;
437           CeedBasis               *active_bases;
438           CeedOperatorAssemblyData assembly_data;
439 
440           PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data));
441           PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
442           PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
443           if (num_bases > 1) {
444             ctx->is_ceed_pbd_valid  = PETSC_FALSE;
445             ctx->is_ceed_vpbd_valid = PETSC_FALSE;
446           }
447           if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
448           if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
449           // LCOV_EXCL_STOP
450         }
451         {
452           PetscInt local_is_valid[2], global_is_valid[2];
453 
454           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid;
455           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
456           ctx->is_ceed_pbd_valid = global_is_valid[0];
457           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid;
458           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
459           ctx->is_ceed_vpbd_valid = global_is_valid[0];
460         }
461       }
462     }
463     PetscCall(MatDestroy(&temp_mat));
464   }
465   // -- Set internal mat type
466   {
467     VecType vec_type;
468     MatType coo_mat_type;
469 
470     PetscCall(VecGetType(ctx->X_loc, &vec_type));
471     if (strstr(vec_type, VECCUDA)) coo_mat_type = MATAIJCUSPARSE;
472     else if (strstr(vec_type, VECKOKKOS)) coo_mat_type = MATAIJKOKKOS;
473     else coo_mat_type = MATAIJ;
474     PetscCall(PetscStrallocpy(coo_mat_type, &ctx->coo_mat_type));
475   }
476   // -- Set mat operations
477   PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
478   PetscCall(MatShellSetOperation(*mat, MATOP_VIEW, (void (*)(void))MatView_Ceed));
479   PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed));
480   if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
481   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
482   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
483   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
484   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
485   PetscCall(MatShellSetVecType(*mat, vec_type));
486   PetscFunctionReturn(PETSC_SUCCESS);
487 }
488 
489 /**
490   @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`.
491 
492   Collective across MPI processes.
493 
494   @param[in]   mat_ceed   `MATCEED` to copy from
495   @param[out]  mat_other  `MatShell` or `MATCEED` to copy into
496 
497   @return An error code: 0 - success, otherwise - failure
498 **/
499 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) {
500   PetscFunctionBeginUser;
501   PetscCall(MatCeedRegisterLogEvents());
502 
503   // Check type compatibility
504   {
505     PetscBool is_matceed = PETSC_FALSE, is_matshell = PETSC_FALSE;
506     MatType   mat_type_ceed, mat_type_other;
507 
508     PetscCall(MatGetType(mat_ceed, &mat_type_ceed));
509     PetscCall(PetscStrcmp(mat_type_ceed, MATCEED, &is_matceed));
510     PetscCheck(is_matceed, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED);
511     PetscCall(MatGetType(mat_other, &mat_type_other));
512     PetscCall(PetscStrcmp(mat_type_other, MATCEED, &is_matceed));
513     PetscCall(PetscStrcmp(mat_type_other, MATSHELL, &is_matceed));
514     PetscCheck(is_matceed || is_matshell, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_other must have type " MATCEED " or " MATSHELL);
515   }
516 
517   // Check dimension compatibility
518   {
519     PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size;
520 
521     PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size));
522     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size));
523     PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size));
524     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size));
525     PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) &&
526                    (X_l_ceed_size == X_l_other_size),
527                PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ,
528                "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT
529                "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT
530                ", %" PetscInt_FMT ")",
531                Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size);
532   }
533 
534   // Convert
535   {
536     VecType        vec_type;
537     MatCeedContext ctx;
538 
539     PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED));
540     PetscCall(MatShellGetContext(mat_ceed, &ctx));
541     PetscCall(MatCeedContextReference(ctx));
542     PetscCall(MatShellSetContext(mat_other, ctx));
543     PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
544     PetscCall(MatShellSetOperation(mat_other, MATOP_VIEW, (void (*)(void))MatView_Ceed));
545     PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed));
546     if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
547     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
548     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
549     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
550     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
551     {
552       PetscInt block_size;
553 
554       PetscCall(MatGetBlockSize(mat_ceed, &block_size));
555       if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size));
556     }
557     {
558       PetscInt        num_blocks;
559       const PetscInt *block_sizes;
560 
561       PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes));
562       if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes));
563     }
564     PetscCall(DMGetVecType(ctx->dm_x, &vec_type));
565     PetscCall(MatShellSetVecType(mat_other, vec_type));
566   }
567   PetscFunctionReturn(PETSC_SUCCESS);
568 }
569 
570 /**
571   @brief Mark `CeedQFunction` data as updated and the `CeedQFunction` as requiring re-assembly for a `MatCEED`.
572 
573   Collective across MPI processes.
574 
575   @param[in]   mat_ceed       `MATCEED`
576   @param[out]  update_needed  Boolean flag indicating `CeedQFunction` update needed
577 
578   @return An error code: 0 - success, otherwise - failure
579 **/
580 PetscErrorCode MatCeedSetAssemblyDataUpdateNeeded(Mat mat_ceed, PetscBool update_needed) {
581   MatCeedContext ctx;
582 
583   PetscFunctionBeginUser;
584   PetscCall(MatShellGetContext(mat_ceed, &ctx));
585   PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult, update_needed));
586   if (ctx->op_mult_transpose) {
587     PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult_transpose, update_needed));
588   }
589   if (update_needed) {
590     PetscCall(MatAssemblyBegin(mat_ceed, MAT_FINAL_ASSEMBLY));
591     PetscCall(MatAssemblyEnd(mat_ceed, MAT_FINAL_ASSEMBLY));
592   }
593   PetscFunctionReturn(PETSC_SUCCESS);
594 }
595 
596 /**
597   @brief Setup a `Mat` with the same COO pattern as a `MatCEED`.
598 
599   Collective across MPI processes.
600 
601   @param[in]   mat_ceed  `MATCEED`
602   @param[out]  mat_coo   Sparse `Mat` with same COO pattern
603 
604   @return An error code: 0 - success, otherwise - failure
605 **/
606 PetscErrorCode MatCeedCreateMatCOO(Mat mat_ceed, Mat *mat_coo) {
607   MatCeedContext ctx;
608 
609   PetscFunctionBeginUser;
610   PetscCall(MatShellGetContext(mat_ceed, &ctx));
611 
612   PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "COO assembly only supported for MATCEED on a single DM");
613 
614   // Check cl mat type
615   {
616     PetscBool is_coo_mat_type_cl = PETSC_FALSE;
617     char      coo_mat_type_cl[64];
618 
619     // Check for specific CL coo mat type for this Mat
620     {
621       const char *mat_ceed_prefix = NULL;
622 
623       PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix));
624       PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL);
625       PetscCall(PetscOptionsFList("-ceed_coo_mat_type", "Default MATCEED COO assembly MatType", NULL, MatList, coo_mat_type_cl, coo_mat_type_cl,
626                                   sizeof(coo_mat_type_cl), &is_coo_mat_type_cl));
627       PetscOptionsEnd();
628       if (is_coo_mat_type_cl) {
629         PetscCall(PetscFree(ctx->coo_mat_type));
630         PetscCall(PetscStrallocpy(coo_mat_type_cl, &ctx->coo_mat_type));
631       }
632     }
633   }
634 
635   // Create sparse matrix
636   {
637     MatType dm_mat_type, dm_mat_type_copy;
638 
639     PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type));
640     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
641     PetscCall(DMSetMatType(ctx->dm_x, ctx->coo_mat_type));
642     PetscCall(DMCreateMatrix(ctx->dm_x, mat_coo));
643     PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy));
644     PetscCall(PetscFree(dm_mat_type_copy));
645   }
646   PetscFunctionReturn(PETSC_SUCCESS);
647 }
648 
649 /**
650   @brief Setup the COO preallocation `MATCEED` into a `MATAIJ` or similar.
651          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
652 
653   Collective across MPI processes.
654 
655   @param[in]      mat_ceed  `MATCEED` to assemble
656   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
657 
658   @return An error code: 0 - success, otherwise - failure
659 **/
660 PetscErrorCode MatCeedSetPreallocationCOO(Mat mat_ceed, Mat mat_coo) {
661   MatCeedContext ctx;
662 
663   PetscFunctionBeginUser;
664   PetscCall(MatShellGetContext(mat_ceed, &ctx));
665 
666   {
667     PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
668     CeedInt      *rows_ceed, *cols_ceed;
669     PetscCount    num_entries;
670     PetscLogStage stage_amg_setup;
671 
672     // -- Assemble sparsity pattern if mat hasn't been assembled before
673     PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
674     if (stage_amg_setup == -1) {
675       PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
676     }
677     PetscCall(PetscLogStagePush(stage_amg_setup));
678     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
679     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
680     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
681     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
682     PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
683     PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
684     PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
685     free(rows_petsc);
686     free(cols_petsc);
687     if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full));
688     PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full));
689     ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo;
690     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
691     PetscCall(PetscLogStagePop());
692   }
693   PetscFunctionReturn(PETSC_SUCCESS);
694 }
695 
696 /**
697   @brief Assemble a `MATCEED` into a `MATAIJ` or similar.
698          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
699          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
700 
701   Collective across MPI processes.
702 
703   @param[in]      mat_ceed  `MATCEED` to assemble
704   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
705 
706   @return An error code: 0 - success, otherwise - failure
707 **/
708 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) {
709   MatCeedContext ctx;
710 
711   PetscFunctionBeginUser;
712   PetscCall(MatShellGetContext(mat_ceed, &ctx));
713 
714   // Set COO pattern if needed
715   {
716     CeedInt index = -1;
717 
718     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
719       if (ctx->mats_assembled_full[i] == mat_coo) index = i;
720     }
721     if (index == -1) PetscCall(MatCeedSetPreallocationCOO(mat_ceed, mat_coo));
722   }
723 
724   // Assemble mat_ceed
725   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
726   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
727   {
728     const CeedScalar *values;
729     MatType           mat_type;
730     CeedMemType       mem_type = CEED_MEM_HOST;
731     PetscBool         is_spd, is_spd_known;
732 
733     PetscCall(MatGetType(mat_coo, &mat_type));
734     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
735     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
736     else mem_type = CEED_MEM_HOST;
737 
738     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
739     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full));
740     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
741     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values));
742     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
743     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
744     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
745     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values));
746   }
747   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
748   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 /**
753   @brief Set the current value of a context field for a `MatCEED`.
754 
755   Not collective across MPI processes.
756 
757   @param[in,out]  mat    `MatCEED`
758   @param[in]      name   Name of the context field
759   @param[in]      value  New context field value
760 
761   @return An error code: 0 - success, otherwise - failure
762 **/
763 PetscErrorCode MatCeedSetContextDouble(Mat mat, const char *name, double value) {
764   PetscBool      was_updated = PETSC_FALSE;
765   MatCeedContext ctx;
766 
767   PetscFunctionBeginUser;
768   PetscCall(MatShellGetContext(mat, &ctx));
769   {
770     CeedContextFieldLabel label = NULL;
771 
772     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult, name, &label));
773     if (label) {
774       double set_value = 2 * value + 1.0;
775 
776       PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
777       if (set_value != value) {
778         PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult, label, &value));
779         was_updated = PETSC_TRUE;
780       }
781     }
782     if (ctx->op_mult_transpose) {
783       label = NULL;
784       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult_transpose, name, &label));
785       if (label) {
786         double set_value = 2 * value + 1.0;
787 
788         PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
789         if (set_value != value) {
790           PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult_transpose, label, &value));
791           was_updated = PETSC_TRUE;
792         }
793       }
794     }
795   }
796   if (was_updated) {
797     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
798     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
799   }
800   PetscFunctionReturn(PETSC_SUCCESS);
801 }
802 
803 /**
804   @brief Get the current value of a context field for a `MatCEED`.
805 
806   Not collective across MPI processes.
807 
808   @param[in]   mat    `MatCEED`
809   @param[in]   name   Name of the context field
810   @param[out]  value  Current context field value
811 
812   @return An error code: 0 - success, otherwise - failure
813 **/
814 PetscErrorCode MatCeedGetContextDouble(Mat mat, const char *name, double *value) {
815   MatCeedContext ctx;
816 
817   PetscFunctionBeginUser;
818   PetscCall(MatShellGetContext(mat, &ctx));
819   {
820     CeedContextFieldLabel label = NULL;
821     CeedOperator          op    = ctx->op_mult;
822 
823     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
824     if (!label && ctx->op_mult_transpose) {
825       op = ctx->op_mult_transpose;
826       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
827     }
828     if (label) {
829       PetscSizeT    num_values;
830       const double *values_ceed;
831 
832       PetscCallCeed(ctx->ceed, CeedOperatorGetContextDoubleRead(op, label, &num_values, &values_ceed));
833       *value = values_ceed[0];
834       PetscCallCeed(ctx->ceed, CeedOperatorRestoreContextDoubleRead(op, label, &values_ceed));
835     }
836   }
837   PetscFunctionReturn(PETSC_SUCCESS);
838 }
839 
840 /**
841   @brief Set the current `PetscReal` value of a context field for a `MatCEED`.
842 
843   Not collective across MPI processes.
844 
845   @param[in,out]  mat    `MatCEED`
846   @param[in]      name   Name of the context field
847   @param[in]      value  New context field value
848 
849   @return An error code: 0 - success, otherwise - failure
850 **/
851 PetscErrorCode MatCeedSetContextReal(Mat mat, const char *name, PetscReal value) {
852   double value_double = value;
853 
854   PetscFunctionBeginUser;
855   PetscCall(MatCeedSetContextDouble(mat, name, value_double));
856   PetscFunctionReturn(PETSC_SUCCESS);
857 }
858 
859 /**
860   @brief Get the current `PetscReal` value of a context field for a `MatCEED`.
861 
862   Not collective across MPI processes.
863 
864   @param[in]   mat    `MatCEED`
865   @param[in]   name   Name of the context field
866   @param[out]  value  Current context field value
867 
868   @return An error code: 0 - success, otherwise - failure
869 **/
870 PetscErrorCode MatCeedGetContextReal(Mat mat, const char *name, PetscReal *value) {
871   double value_double = 0.0;
872 
873   PetscFunctionBeginUser;
874   PetscCall(MatCeedGetContextDouble(mat, name, &value_double));
875   *value = value_double;
876   PetscFunctionReturn(PETSC_SUCCESS);
877 }
878 
879 /**
880   @brief Set the current time for a `MatCEED`.
881 
882   Not collective across MPI processes.
883 
884   @param[in,out]  mat   `MatCEED`
885   @param[in]      time  Current time
886 
887   @return An error code: 0 - success, otherwise - failure
888 **/
889 PetscErrorCode MatCeedSetTime(Mat mat, PetscReal time) {
890   PetscFunctionBeginUser;
891   {
892     double time_ceed = time;
893 
894     PetscCall(MatCeedSetContextDouble(mat, "time", time_ceed));
895   }
896   PetscFunctionReturn(PETSC_SUCCESS);
897 }
898 
899 /**
900   @brief Get the current time for a `MatCEED`.
901 
902   Not collective across MPI processes.
903 
904   @param[in]   mat   `MatCEED`
905   @param[out]  time  Current time, or -1.0 if the boundary evaluator has no time field
906 
907   @return An error code: 0 - success, otherwise - failure
908 **/
909 PetscErrorCode MatCeedGetTime(Mat mat, PetscReal *time) {
910   PetscFunctionBeginUser;
911   *time = -1.0;
912   {
913     double time_ceed = -1.0;
914 
915     PetscCall(MatCeedGetContextDouble(mat, "time", &time_ceed));
916     *time = time_ceed;
917   }
918   PetscFunctionReturn(PETSC_SUCCESS);
919 }
920 
921 /**
922   @brief Set the current time step for a `MatCEED`.
923 
924   Not collective across MPI processes.
925 
926   @param[in,out]  mat  `MatCEED`
927   @param[in]      dt   Current time step
928 
929   @return An error code: 0 - success, otherwise - failure
930 **/
931 PetscErrorCode MatCeedSetDt(Mat mat, PetscReal dt) {
932   PetscFunctionBeginUser;
933   {
934     double dt_ceed = dt;
935 
936     PetscCall(MatCeedSetContextDouble(mat, "dt", dt_ceed));
937   }
938   PetscFunctionReturn(PETSC_SUCCESS);
939 }
940 
941 /**
942   @brief Set the Jacobian shifts for a `MatCEED`.
943 
944   Not collective across MPI processes.
945 
946   @param[in,out]  mat      `MatCEED`
947   @param[in]      shift_v  Velocity shift
948   @param[in]      shift_a  Acceleration shift
949 
950   @return An error code: 0 - success, otherwise - failure
951 **/
952 PetscErrorCode MatCeedSetShifts(Mat mat, PetscReal shift_v, PetscReal shift_a) {
953   PetscFunctionBeginUser;
954   {
955     double shift_v_ceed = shift_v;
956 
957     PetscCall(MatCeedSetContextDouble(mat, "shift v", shift_v_ceed));
958   }
959   if (shift_a) {
960     double shift_a_ceed = shift_a;
961 
962     PetscCall(MatCeedSetContextDouble(mat, "shift a", shift_a_ceed));
963   }
964   PetscFunctionReturn(PETSC_SUCCESS);
965 }
966 
967 /**
968   @brief Set user context for a `MATCEED`.
969 
970   Collective across MPI processes.
971 
972   @param[in,out]  mat  `MATCEED`
973   @param[in]      f    The context destroy function, or NULL
974   @param[in]      ctx  User context, or NULL to unset
975 
976   @return An error code: 0 - success, otherwise - failure
977 **/
978 PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) {
979   PetscContainer user_ctx = NULL;
980 
981   PetscFunctionBeginUser;
982   if (ctx) {
983     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx));
984     PetscCall(PetscContainerSetPointer(user_ctx, ctx));
985     PetscCall(PetscContainerSetUserDestroy(user_ctx, f));
986   }
987   PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx));
988   PetscCall(PetscContainerDestroy(&user_ctx));
989   PetscFunctionReturn(PETSC_SUCCESS);
990 }
991 
992 /**
993   @brief Retrieve the user context for a `MATCEED`.
994 
995   Collective across MPI processes.
996 
997   @param[in,out]  mat  `MATCEED`
998   @param[in]      ctx  User context
999 
1000   @return An error code: 0 - success, otherwise - failure
1001 **/
1002 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) {
1003   PetscContainer user_ctx;
1004 
1005   PetscFunctionBeginUser;
1006   PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx));
1007   if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx));
1008   else *(void **)ctx = NULL;
1009   PetscFunctionReturn(PETSC_SUCCESS);
1010 }
1011 /**
1012   @brief Set a user defined matrix operation for a `MATCEED` matrix.
1013 
1014   Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by
1015 `MatCeedSetContext()`.
1016 
1017   Collective across MPI processes.
1018 
1019   @param[in,out]  mat  `MATCEED`
1020   @param[in]      op   Name of the `MatOperation`
1021   @param[in]      g    Function that provides the operation
1022 
1023   @return An error code: 0 - success, otherwise - failure
1024 **/
1025 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) {
1026   PetscFunctionBeginUser;
1027   PetscCall(MatShellSetOperation(mat, op, g));
1028   PetscFunctionReturn(PETSC_SUCCESS);
1029 }
1030 
1031 /**
1032   @brief Sets the default COO matrix type as a string from the `MATCEED`.
1033 
1034   Collective across MPI processes.
1035 
1036   @param[in,out]  mat   `MATCEED`
1037   @param[in]      type  COO `MatType` to set
1038 
1039   @return An error code: 0 - success, otherwise - failure
1040 **/
1041 PetscErrorCode MatCeedSetCOOMatType(Mat mat, MatType type) {
1042   MatCeedContext ctx;
1043 
1044   PetscFunctionBeginUser;
1045   PetscCall(MatShellGetContext(mat, &ctx));
1046   // Check if same
1047   {
1048     size_t    len_old, len_new;
1049     PetscBool is_same = PETSC_FALSE;
1050 
1051     PetscCall(PetscStrlen(ctx->coo_mat_type, &len_old));
1052     PetscCall(PetscStrlen(type, &len_new));
1053     if (len_old == len_new) PetscCall(PetscStrncmp(ctx->coo_mat_type, type, len_old, &is_same));
1054     if (is_same) PetscFunctionReturn(PETSC_SUCCESS);
1055   }
1056   // Clean up old mats in different format
1057   // LCOV_EXCL_START
1058   if (ctx->mat_assembled_full_internal) {
1059     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
1060       if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) {
1061         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) {
1062           ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j];
1063         }
1064         ctx->num_mats_assembled_full--;
1065         // Note: we'll realloc this array again, so no need to shrink the allocation
1066         PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1067       }
1068     }
1069   }
1070   if (ctx->mat_assembled_pbd_internal) {
1071     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
1072       if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) {
1073         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) {
1074           ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j];
1075         }
1076         // Note: we'll realloc this array again, so no need to shrink the allocation
1077         ctx->num_mats_assembled_pbd--;
1078         PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1079       }
1080     }
1081   }
1082   PetscCall(PetscFree(ctx->coo_mat_type));
1083   PetscCall(PetscStrallocpy(type, &ctx->coo_mat_type));
1084   PetscFunctionReturn(PETSC_SUCCESS);
1085   // LCOV_EXCL_STOP
1086 }
1087 
1088 /**
1089   @brief Gets the default COO matrix type as a string from the `MATCEED`.
1090 
1091   Collective across MPI processes.
1092 
1093   @param[in,out]  mat   `MATCEED`
1094   @param[in]      type  COO `MatType`
1095 
1096   @return An error code: 0 - success, otherwise - failure
1097 **/
1098 PetscErrorCode MatCeedGetCOOMatType(Mat mat, MatType *type) {
1099   MatCeedContext ctx;
1100 
1101   PetscFunctionBeginUser;
1102   PetscCall(MatShellGetContext(mat, &ctx));
1103   *type = ctx->coo_mat_type;
1104   PetscFunctionReturn(PETSC_SUCCESS);
1105 }
1106 
1107 /**
1108   @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1109 
1110   Not collective across MPI processes.
1111 
1112   @param[in,out]  mat              `MATCEED`
1113   @param[in]      X_loc            Input PETSc local vector, or NULL
1114   @param[in]      Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1115 
1116   @return An error code: 0 - success, otherwise - failure
1117 **/
1118 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) {
1119   MatCeedContext ctx;
1120 
1121   PetscFunctionBeginUser;
1122   PetscCall(MatShellGetContext(mat, &ctx));
1123   if (X_loc) {
1124     PetscInt len_old, len_new;
1125 
1126     PetscCall(VecGetSize(ctx->X_loc, &len_old));
1127     PetscCall(VecGetSize(X_loc, &len_new));
1128     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT,
1129                len_new, len_old);
1130     PetscCall(VecReferenceCopy(X_loc, &ctx->X_loc));
1131   }
1132   if (Y_loc_transpose) {
1133     PetscInt len_old, len_new;
1134 
1135     PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old));
1136     PetscCall(VecGetSize(Y_loc_transpose, &len_new));
1137     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB,
1138                "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old);
1139     PetscCall(VecReferenceCopy(Y_loc_transpose, &ctx->Y_loc_transpose));
1140   }
1141   PetscFunctionReturn(PETSC_SUCCESS);
1142 }
1143 
1144 /**
1145   @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1146 
1147   Not collective across MPI processes.
1148 
1149   @param[in,out]  mat              `MATCEED`
1150   @param[out]     X_loc            Input PETSc local vector, or NULL
1151   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1152 
1153   @return An error code: 0 - success, otherwise - failure
1154 **/
1155 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1156   MatCeedContext ctx;
1157 
1158   PetscFunctionBeginUser;
1159   PetscCall(MatShellGetContext(mat, &ctx));
1160   if (X_loc) {
1161     *X_loc = NULL;
1162     PetscCall(VecReferenceCopy(ctx->X_loc, X_loc));
1163   }
1164   if (Y_loc_transpose) {
1165     *Y_loc_transpose = NULL;
1166     PetscCall(VecReferenceCopy(ctx->Y_loc_transpose, Y_loc_transpose));
1167   }
1168   PetscFunctionReturn(PETSC_SUCCESS);
1169 }
1170 
1171 /**
1172   @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1173 
1174   Not collective across MPI processes.
1175 
1176   @param[in,out]  mat              MatCeed
1177   @param[out]     X_loc            Input PETSc local vector, or NULL
1178   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1179 
1180   @return An error code: 0 - success, otherwise - failure
1181 **/
1182 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1183   PetscFunctionBeginUser;
1184   if (X_loc) PetscCall(VecDestroy(X_loc));
1185   if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose));
1186   PetscFunctionReturn(PETSC_SUCCESS);
1187 }
1188 
1189 /**
1190   @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1191 
1192   Not collective across MPI processes.
1193 
1194   @param[in,out]  mat                MatCeed
1195   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1196   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1197 
1198   @return An error code: 0 - success, otherwise - failure
1199 **/
1200 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1201   MatCeedContext ctx;
1202 
1203   PetscFunctionBeginUser;
1204   PetscCall(MatShellGetContext(mat, &ctx));
1205   if (op_mult) {
1206     *op_mult = NULL;
1207     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult));
1208   }
1209   if (op_mult_transpose) {
1210     *op_mult_transpose = NULL;
1211     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose));
1212   }
1213   PetscFunctionReturn(PETSC_SUCCESS);
1214 }
1215 
1216 /**
1217   @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1218 
1219   Not collective across MPI processes.
1220 
1221   @param[in,out]  mat                MatCeed
1222   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1223   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1224 
1225   @return An error code: 0 - success, otherwise - failure
1226 **/
1227 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1228   MatCeedContext ctx;
1229 
1230   PetscFunctionBeginUser;
1231   PetscCall(MatShellGetContext(mat, &ctx));
1232   if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult));
1233   if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose));
1234   PetscFunctionReturn(PETSC_SUCCESS);
1235 }
1236 
1237 /**
1238   @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1239 
1240   Not collective across MPI processes.
1241 
1242   @param[in,out]  mat                       MatCeed
1243   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1244   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1245 
1246   @return An error code: 0 - success, otherwise - failure
1247 **/
1248 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1249   MatCeedContext ctx;
1250 
1251   PetscFunctionBeginUser;
1252   PetscCall(MatShellGetContext(mat, &ctx));
1253   if (log_event_mult) ctx->log_event_mult = log_event_mult;
1254   if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose;
1255   PetscFunctionReturn(PETSC_SUCCESS);
1256 }
1257 
1258 /**
1259   @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1260 
1261   Not collective across MPI processes.
1262 
1263   @param[in,out]  mat                       MatCeed
1264   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1265   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1266 
1267   @return An error code: 0 - success, otherwise - failure
1268 **/
1269 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1270   MatCeedContext ctx;
1271 
1272   PetscFunctionBeginUser;
1273   PetscCall(MatShellGetContext(mat, &ctx));
1274   if (log_event_mult) *log_event_mult = ctx->log_event_mult;
1275   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose;
1276   PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278 
1279 /**
1280   @brief Set `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1281 
1282   Not collective across MPI processes.
1283 
1284   @param[in,out]  mat                       MatCeed
1285   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1286   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1287 
1288   @return An error code: 0 - success, otherwise - failure
1289 **/
1290 PetscErrorCode MatCeedSetCeedOperatorLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1291   MatCeedContext ctx;
1292 
1293   PetscFunctionBeginUser;
1294   PetscCall(MatShellGetContext(mat, &ctx));
1295   if (log_event_mult) ctx->log_event_ceed_mult = log_event_mult;
1296   if (log_event_mult_transpose) ctx->log_event_ceed_mult_transpose = log_event_mult_transpose;
1297   PetscFunctionReturn(PETSC_SUCCESS);
1298 }
1299 
1300 /**
1301   @brief Get `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1302 
1303   Not collective across MPI processes.
1304 
1305   @param[in,out]  mat                       MatCeed
1306   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1307   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1308 
1309   @return An error code: 0 - success, otherwise - failure
1310 **/
1311 PetscErrorCode MatCeedGetCeedOperatorLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1312   MatCeedContext ctx;
1313 
1314   PetscFunctionBeginUser;
1315   PetscCall(MatShellGetContext(mat, &ctx));
1316   if (log_event_mult) *log_event_mult = ctx->log_event_ceed_mult;
1317   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_ceed_mult_transpose;
1318   PetscFunctionReturn(PETSC_SUCCESS);
1319 }
1320 
1321 // -----------------------------------------------------------------------------
1322 // Operator context data
1323 // -----------------------------------------------------------------------------
1324 
1325 /**
1326   @brief Setup context data for operator application.
1327 
1328   Collective across MPI processes.
1329 
1330   @param[in]   dm_x                           Input `DM`
1331   @param[in]   dm_y                           Output `DM`
1332   @param[in]   X_loc                          Input PETSc local vector, or NULL
1333   @param[in]   Y_loc_transpose                Input PETSc local vector for transpose operation, or NULL
1334   @param[in]   op_mult                        `CeedOperator` for forward evaluation
1335   @param[in]   op_mult_transpose              `CeedOperator` for transpose evaluation
1336   @param[in]   log_event_mult                 `PetscLogEvent` for forward evaluation
1337   @param[in]   log_event_mult_transpose       `PetscLogEvent` for transpose evaluation
1338   @param[in]   log_event_ceed_mult            `PetscLogEvent` for forward `CeedOperator` evaluation
1339   @param[in]   log_event_ceed_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation
1340   @param[out]  ctx                            Context data for operator evaluation
1341 
1342   @return An error code: 0 - success, otherwise - failure
1343 **/
1344 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose,
1345                                     PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, PetscLogEvent log_event_ceed_mult,
1346                                     PetscLogEvent log_event_ceed_mult_transpose, MatCeedContext *ctx) {
1347   CeedSize x_loc_len, y_loc_len;
1348 
1349   PetscFunctionBeginUser;
1350 
1351   // Allocate
1352   PetscCall(PetscNew(ctx));
1353   (*ctx)->ref_count = 1;
1354 
1355   // Logging
1356   (*ctx)->log_event_mult                = log_event_mult;
1357   (*ctx)->log_event_mult_transpose      = log_event_mult_transpose;
1358   (*ctx)->log_event_ceed_mult           = log_event_ceed_mult;
1359   (*ctx)->log_event_ceed_mult_transpose = log_event_ceed_mult_transpose;
1360 
1361   // PETSc objects
1362   PetscCall(DMReferenceCopy(dm_x, &(*ctx)->dm_x));
1363   PetscCall(DMReferenceCopy(dm_y, &(*ctx)->dm_y));
1364   if (X_loc) PetscCall(VecReferenceCopy(X_loc, &(*ctx)->X_loc));
1365   if (Y_loc_transpose) PetscCall(VecReferenceCopy(Y_loc_transpose, &(*ctx)->Y_loc_transpose));
1366 
1367   // Memtype
1368   {
1369     const PetscScalar *x;
1370     Vec                X;
1371 
1372     PetscCall(DMGetLocalVector(dm_x, &X));
1373     PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type));
1374     PetscCall(VecRestoreArrayReadAndMemType(X, &x));
1375     PetscCall(DMRestoreLocalVector(dm_x, &X));
1376   }
1377 
1378   // libCEED objects
1379   PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB,
1380              "retrieving Ceed context object failed");
1381   PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed));
1382   PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len));
1383   PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult));
1384   if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose));
1385   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc));
1386   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc));
1387 
1388   // Flop counting
1389   {
1390     CeedSize ceed_flops_estimate = 0;
1391 
1392     PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate));
1393     (*ctx)->flops_mult = ceed_flops_estimate;
1394     if (op_mult_transpose) {
1395       PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate));
1396       (*ctx)->flops_mult_transpose = ceed_flops_estimate;
1397     }
1398   }
1399 
1400   // Check sizes
1401   if (x_loc_len > 0 || y_loc_len > 0) {
1402     CeedSize ctx_x_loc_len, ctx_y_loc_len;
1403     PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len;
1404     Vec      dm_X_loc, dm_Y_loc;
1405 
1406     // -- Input
1407     PetscCall(DMGetLocalVector(dm_x, &dm_X_loc));
1408     PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len));
1409     PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc));
1410     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len));
1411     if (X_loc) {
1412       PetscCall(VecGetLocalSize(X_loc, &X_loc_len));
1413       PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1414                  "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len);
1415     }
1416     PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions",
1417                x_loc_len, dm_x_loc_len);
1418     PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")",
1419                x_loc_len, ctx_x_loc_len);
1420 
1421     // -- Output
1422     PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc));
1423     PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len));
1424     PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc));
1425     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len));
1426     PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions",
1427                ctx_y_loc_len, dm_y_loc_len);
1428 
1429     // -- Transpose
1430     if (Y_loc_transpose) {
1431       PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len));
1432       PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1433                  "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len);
1434     }
1435   }
1436   PetscFunctionReturn(PETSC_SUCCESS);
1437 }
1438 
1439 /**
1440   @brief Increment reference counter for `MATCEED` context.
1441 
1442   Not collective across MPI processes.
1443 
1444   @param[in,out]  ctx  Context data
1445 
1446   @return An error code: 0 - success, otherwise - failure
1447 **/
1448 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) {
1449   PetscFunctionBeginUser;
1450   ctx->ref_count++;
1451   PetscFunctionReturn(PETSC_SUCCESS);
1452 }
1453 
1454 /**
1455   @brief Copy reference for `MATCEED`.
1456          Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`.
1457 
1458   Not collective across MPI processes.
1459 
1460   @param[in]   ctx       Context data
1461   @param[out]  ctx_copy  Copy of pointer to context data
1462 
1463   @return An error code: 0 - success, otherwise - failure
1464 **/
1465 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) {
1466   PetscFunctionBeginUser;
1467   PetscCall(MatCeedContextReference(ctx));
1468   PetscCall(MatCeedContextDestroy(*ctx_copy));
1469   *ctx_copy = ctx;
1470   PetscFunctionReturn(PETSC_SUCCESS);
1471 }
1472 
1473 /**
1474   @brief Destroy context data for operator application.
1475 
1476   Collective across MPI processes.
1477 
1478   @param[in,out]  ctx  Context data for operator evaluation
1479 
1480   @return An error code: 0 - success, otherwise - failure
1481 **/
1482 PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) {
1483   PetscFunctionBeginUser;
1484   if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS);
1485 
1486   // PETSc objects
1487   PetscCall(DMDestroy(&ctx->dm_x));
1488   PetscCall(DMDestroy(&ctx->dm_y));
1489   PetscCall(VecDestroy(&ctx->X_loc));
1490   PetscCall(VecDestroy(&ctx->Y_loc_transpose));
1491   PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1492   PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1493   PetscCall(PetscFree(ctx->coo_mat_type));
1494   PetscCall(PetscFree(ctx->mats_assembled_full));
1495   PetscCall(PetscFree(ctx->mats_assembled_pbd));
1496 
1497   // libCEED objects
1498   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc));
1499   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc));
1500   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full));
1501   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd));
1502   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult));
1503   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose));
1504   PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed");
1505 
1506   // Deallocate
1507   ctx->is_destroyed = PETSC_TRUE;  // Flag as destroyed in case someone has stale ref
1508   PetscCall(PetscFree(ctx));
1509   PetscFunctionReturn(PETSC_SUCCESS);
1510 }
1511 
1512 /**
1513   @brief Compute the diagonal of an operator via libCEED.
1514 
1515   Collective across MPI processes.
1516 
1517   @param[in]   A  `MATCEED`
1518   @param[out]  D  Vector holding operator diagonal
1519 
1520   @return An error code: 0 - success, otherwise - failure
1521 **/
1522 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) {
1523   PetscMemType   mem_type;
1524   Vec            D_loc;
1525   MatCeedContext ctx;
1526 
1527   PetscFunctionBeginUser;
1528   PetscCall(MatShellGetContext(A, &ctx));
1529 
1530   // Place PETSc vector in libCEED vector
1531   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1532   PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc));
1533   PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc));
1534 
1535   // Compute Diagonal
1536   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1537   PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1538   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1539 
1540   // Restore PETSc vector
1541   PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc));
1542 
1543   // Local-to-Global
1544   PetscCall(VecZeroEntries(D));
1545   PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D));
1546   PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc));
1547   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1548   PetscFunctionReturn(PETSC_SUCCESS);
1549 }
1550 
1551 /**
1552   @brief Compute `A X = Y` for a `MATCEED`.
1553 
1554   Collective across MPI processes.
1555 
1556   @param[in]   A  `MATCEED`
1557   @param[in]   X  Input PETSc vector
1558   @param[out]  Y  Output PETSc vector
1559 
1560   @return An error code: 0 - success, otherwise - failure
1561 **/
1562 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) {
1563   MatCeedContext ctx;
1564 
1565   PetscFunctionBeginUser;
1566   PetscCall(MatShellGetContext(A, &ctx));
1567   PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, NULL));
1568 
1569   {
1570     PetscMemType x_mem_type, y_mem_type;
1571     Vec          X_loc = ctx->X_loc, Y_loc;
1572 
1573     // Get local vectors
1574     if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1575     PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1576 
1577     // Global-to-local
1578     PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc));
1579 
1580     // Setup libCEED vectors
1581     PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1582     PetscCall(VecZeroEntries(Y_loc));
1583     PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1584 
1585     // Apply libCEED operator
1586     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult, A, X, Y, NULL));
1587     PetscCall(PetscLogGpuTimeBegin());
1588     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE));
1589     PetscCall(PetscLogGpuTimeEnd());
1590     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult, A, X, Y, NULL));
1591 
1592     // Restore PETSc vectors
1593     PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1594     PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1595 
1596     // Local-to-global
1597     PetscCall(VecZeroEntries(Y));
1598     PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y));
1599 
1600     // Restore local vectors, as needed
1601     if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1602     PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1603   }
1604 
1605   // Log flops
1606   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult));
1607   else PetscCall(PetscLogFlops(ctx->flops_mult));
1608   PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, NULL));
1609   PetscFunctionReturn(PETSC_SUCCESS);
1610 }
1611 
1612 /**
1613   @brief Compute `A^T Y = X` for a `MATCEED`.
1614 
1615   Collective across MPI processes.
1616 
1617   @param[in]   A  `MATCEED`
1618   @param[in]   Y  Input PETSc vector
1619   @param[out]  X  Output PETSc vector
1620 
1621   @return An error code: 0 - success, otherwise - failure
1622 **/
1623 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) {
1624   MatCeedContext ctx;
1625 
1626   PetscFunctionBeginUser;
1627   PetscCall(MatShellGetContext(A, &ctx));
1628   PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, NULL));
1629 
1630   {
1631     PetscMemType x_mem_type, y_mem_type;
1632     Vec          X_loc, Y_loc = ctx->Y_loc_transpose;
1633 
1634     // Get local vectors
1635     if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1636     PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1637 
1638     // Global-to-local
1639     PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc));
1640 
1641     // Setup libCEED vectors
1642     PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1643     PetscCall(VecZeroEntries(X_loc));
1644     PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1645 
1646     // Apply libCEED operator
1647     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1648     PetscCall(PetscLogGpuTimeBegin());
1649     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1650     PetscCall(PetscLogGpuTimeEnd());
1651     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1652 
1653     // Restore PETSc vectors
1654     PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1655     PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1656 
1657     // Local-to-global
1658     PetscCall(VecZeroEntries(X));
1659     PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X));
1660 
1661     // Restore local vectors, as needed
1662     if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1663     PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1664   }
1665 
1666   // Log flops
1667   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose));
1668   else PetscCall(PetscLogFlops(ctx->flops_mult_transpose));
1669   PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, NULL));
1670   PetscFunctionReturn(PETSC_SUCCESS);
1671 }
1672