xref: /honee/src/mat-ceed.c (revision 481d14cbe61cedfaf9e39a526b7a6439011a8661)
1 // SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors.
2 // SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause
3 /// @file
4 /// MatCEED implementation
5 
6 #include <ceed.h>
7 #include <ceed/backend.h>
8 #include <mat-ceed-impl.h>
9 #include <mat-ceed.h>
10 #include <petsc-ceed-utils.h>
11 #include <petsc-ceed.h>
12 #include <petscdm.h>
13 #include <petscmat.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 
19 PetscClassId  MATCEED_CLASSID;
20 PetscLogEvent MATCEED_MULT, MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE, MATCEED_MULT_TRANSPOSE_CEEDOP, MATCEED_ASSEMBLE_DIAGONAL,
21     MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, MATCEED_SETUP_PBDIAGONAL, MATCEED_SETUP_PBDIAGONAL_CEEDOP, MATCEED_ASSEMBLE_PBDIAGONAL,
22     MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, MATCEED_SETUP_FULL, MATCEED_SETUP_FULL_CEEDOP, MATCEED_ASSEMBLE_FULL, MATCEED_ASSEMBLE_FULL_CEEDOP;
23 
24 /**
25   @brief Register MATCEED log events.
26 
27   Not collective across MPI processes.
28 
29   @return An error code: 0 - success, otherwise - failure
30 **/
31 static PetscErrorCode MatCeedRegisterLogEvents() {
32   static PetscBool registered = PETSC_FALSE;
33 
34   PetscFunctionBeginUser;
35   if (registered) PetscFunctionReturn(PETSC_SUCCESS);
36   PetscCall(PetscClassIdRegister("MatCEED", &MATCEED_CLASSID));
37   PetscCall(PetscLogEventRegister("MatCEEDMul", MATCEED_CLASSID, &MATCEED_MULT));
38   PetscCall(PetscLogEventRegister("MatCEEDMulCeed", MATCEED_CLASSID, &MATCEED_MULT_CEEDOP));
39   PetscCall(PetscLogEventRegister("MatCEEDMulT", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE));
40   PetscCall(PetscLogEventRegister("MatCEEDMulTCeed", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE_CEEDOP));
41   PetscCall(PetscLogEventRegister("MatCEEDAsmDiag", MATCEED_CLASSID, &MATCEED_ASSEMBLE_DIAGONAL));
42   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSU", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL));
43   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL_CEEDOP));
44   PetscCall(PetscLogEventRegister("MatCEEDAsmPBD", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL));
45   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP));
46   PetscCall(PetscLogEventRegister("MatCEEDAsmSU", MATCEED_CLASSID, &MATCEED_SETUP_FULL));
47   PetscCall(PetscLogEventRegister("MatCEEDAsmSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_FULL_CEEDOP));
48   PetscCall(PetscLogEventRegister("MatCEEDAsm", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL));
49   PetscCall(PetscLogEventRegister("MatCEEDAsmCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL_CEEDOP));
50   registered = PETSC_TRUE;
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 /**
55   @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar.
56          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
57          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
58 
59   Collective across MPI processes.
60 
61   @param[in]      mat_ceed  `MATCEED` to assemble
62   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
63 
64   @return An error code: 0 - success, otherwise - failure
65 **/
66 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) {
67   MatCeedContext ctx;
68 
69   PetscFunctionBeginUser;
70   PetscCall(MatShellGetContext(mat_ceed, &ctx));
71 
72   // Check if COO pattern set
73   {
74     PetscInt index = -1;
75 
76     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
77       if (ctx->mats_assembled_pbd[i] == mat_coo) index = i;
78     }
79     if (index == -1) {
80       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
81       CeedInt      *rows_ceed, *cols_ceed;
82       PetscCount    num_entries;
83       PetscLogStage stage_amg_setup;
84 
85       // -- Assemble sparsity pattern if mat hasn't been assembled before
86       PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
87       if (stage_amg_setup == -1) {
88         PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
89       }
90       PetscCall(PetscLogStagePush(stage_amg_setup));
91       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
92       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
93       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
94       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
95       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
96       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
97       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
98       free(rows_petsc);
99       free(cols_petsc);
100       if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd));
101       PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd));
102       ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo;
103       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
104       PetscCall(PetscLogStagePop());
105     }
106   }
107 
108   // Assemble mat_ceed
109   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
110   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
111   {
112     const CeedScalar *values;
113     MatType           mat_type;
114     CeedMemType       mem_type = CEED_MEM_HOST;
115     PetscBool         is_spd, is_spd_known;
116 
117     PetscCall(MatGetType(mat_coo, &mat_type));
118     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
119     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
120     else mem_type = CEED_MEM_HOST;
121 
122     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
123     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE));
124     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
125     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values));
126     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
127     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
128     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
129     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values));
130   }
131   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
132   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
133   PetscFunctionReturn(PETSC_SUCCESS);
134 }
135 
136 /**
137   @brief Assemble inner `Mat` for diagonal `PC` operations
138 
139   Collective across MPI processes.
140 
141   @param[in]   mat_ceed      `MATCEED` to invert
142   @param[in]   use_ceed_pbd  Boolean flag to use libCEED PBD assembly
143   @param[out]  mat_inner     Inner `Mat` for diagonal operations
144 
145   @return An error code: 0 - success, otherwise - failure
146 **/
147 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) {
148   MatCeedContext ctx;
149 
150   PetscFunctionBeginUser;
151   PetscCall(MatShellGetContext(mat_ceed, &ctx));
152   if (use_ceed_pbd) {
153     // Check if COO pattern set
154     if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_pbd_internal));
155 
156     // Assemble mat_assembled_full_internal
157     PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal));
158     if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal;
159   } else {
160     // Check if COO pattern set
161     if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
162 
163     // Assemble mat_assembled_full_internal
164     PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
165     if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal;
166   }
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 /**
171   @brief Get `MATCEED` variable block diagonal for Jacobi.
172 
173   Collective across MPI processes.
174 
175   @param[in]   mat_ceed   `MATCEED` to invert
176   @param[out]  mat_vblock  The variable diagonal block matrix
177 
178   @return An error code: 0 - success, otherwise - failure
179 **/
180 static PetscErrorCode MatGetVariableBlockDiagonal_Ceed(Mat mat_ceed, Mat *mat_vblock) {
181   MatCeedContext ctx;
182 
183   PetscFunctionBeginUser;
184   PetscCall(MatShellGetContext(mat_ceed, &ctx));
185 
186   // Assemble inner mat if needed
187   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, mat_vblock));
188   PetscCall(PetscObjectReference((PetscObject)*mat_vblock));
189   PetscFunctionReturn(PETSC_SUCCESS);
190 }
191 
192 /**
193   @brief Get `MATCEED` block diagonal for Jacobi.
194 
195   Collective across MPI processes.
196 
197   @param[in]   mat_ceed   `MATCEED` to invert
198   @param[out]  mat_block  The variable diagonal block matrix
199 
200   @return An error code: 0 - success, otherwise - failure
201 **/
202 static PetscErrorCode MatGetBlockDiagonal_Ceed(Mat mat_ceed, Mat *mat_block) {
203   MatCeedContext ctx;
204 
205   PetscFunctionBeginUser;
206   PetscCall(MatShellGetContext(mat_ceed, &ctx));
207 
208   // Assemble inner mat if needed
209   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, mat_block));
210   PetscCall(PetscObjectReference((PetscObject)*mat_block));
211   PetscFunctionReturn(PETSC_SUCCESS);
212 }
213 
214 /**
215   @brief Get on-process diagonal block of `MATCEED`
216 
217   This is a block per-process of the diagonal of the global matrix.
218   This is NOT the diagonal blocks associated with the block size of the matrix (i.e. `MatSetBlockSize()` has no effect on this function).
219 
220   Collective across MPI processes.
221 
222   @param[in]   mat_ceed   `MATCEED` to invert
223   @param[out]  mat_block  The diagonal block matrix
224 
225   @return An error code: 0 - success, otherwise - failure
226 **/
227 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) {
228   MatCeedContext ctx;
229 
230   PetscFunctionBeginUser;
231   PetscCall(MatShellGetContext(mat_ceed, &ctx));
232 
233   // Check if COO pattern set
234   if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
235 
236   // Assemble mat_assembled_full_internal
237   PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
238 
239   // Get diagonal block
240   PetscCall(MatGetDiagonalBlock(ctx->mat_assembled_full_internal, mat_block));
241   PetscFunctionReturn(PETSC_SUCCESS);
242 }
243 
244 /**
245   @brief View `MATCEED`.
246 
247   Collective across MPI processes.
248 
249   @param[in]   mat_ceed  `MATCEED` to view
250   @param[in]   viewer    The visualization context
251 
252   @return An error code: 0 - success, otherwise - failure
253 **/
254 static PetscErrorCode MatView_Ceed(Mat mat_ceed, PetscViewer viewer) {
255   PetscBool         is_ascii;
256   PetscViewerFormat format;
257   PetscMPIInt       size, rank;
258   MatCeedContext    ctx;
259 
260   PetscFunctionBeginUser;
261   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
262   PetscCall(MatShellGetContext(mat_ceed, &ctx));
263   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)mat_ceed), &viewer));
264 
265   PetscCall(PetscViewerGetFormat(viewer, &format));
266   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat_ceed), &size));
267   if (size == 1 && format == PETSC_VIEWER_LOAD_BALANCE) PetscFunctionReturn(PETSC_SUCCESS);
268 
269   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat_ceed), &rank));
270   if (rank != 0) PetscFunctionReturn(PETSC_SUCCESS);
271 
272   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &is_ascii));
273   {
274     PetscBool is_detailed     = format == PETSC_VIEWER_ASCII_INFO_DETAIL;
275     char      rank_string[16] = {'\0'};
276     FILE     *file;
277 
278     PetscCall(PetscViewerASCIIPrintf(viewer, "MatCEED:\n"));
279     PetscCall(PetscViewerASCIIPushTab(viewer));  // MatCEED
280     PetscCall(PetscViewerASCIIPrintf(viewer, "Default COO MatType: %s\n", ctx->coo_mat_type));
281     PetscCall(PetscSNPrintf(rank_string, 16, "on Rank %d", rank));
282     PetscCall(PetscViewerASCIIPrintf(viewer, "CeedOperator Apply %s:\n", is_detailed ? rank_string : "Summary"));
283     PetscCall(PetscViewerASCIIPrintf(viewer, "libCEED PB Diagonal Assembly: %s\n", ctx->is_ceed_pbd_valid ? "True" : "False"));
284     PetscCall(PetscViewerASCIIPrintf(viewer, "libCEED VPB Diagonal Assembly: %s\n", ctx->is_ceed_vpbd_valid ? "True" : "False"));
285     PetscCall(PetscViewerASCIIGetPointer(viewer, &file));
286     PetscCall(PetscViewerASCIIPushTab(viewer));  // CeedOperator
287     if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult, file));
288     else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult, file));
289     PetscCall(PetscViewerASCIIPopTab(viewer));  // CeedOperator
290     if (ctx->op_mult_transpose) {
291       PetscCall(PetscViewerASCIIPrintf(viewer, "CeedOperator ApplyTranspose %s:\n", is_detailed ? rank_string : "Summary"));
292       PetscCall(PetscViewerASCIIPushTab(viewer));  // CeedOperator
293       if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult_transpose, file));
294       else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult_transpose, file));
295       PetscCall(PetscViewerASCIIPopTab(viewer));  // CeedOperator
296     }
297     PetscCall(PetscViewerASCIIPopTab(viewer));  // MatCEED
298   }
299   PetscFunctionReturn(PETSC_SUCCESS);
300 }
301 
302 // -----------------------------------------------------------------------------
303 // MatCeed
304 // -----------------------------------------------------------------------------
305 
306 /**
307   @brief Create PETSc `Mat` from libCEED operators.
308 
309   Collective across MPI processes.
310 
311   @param[in]   dm_x                      Input `DM`
312   @param[in]   dm_y                      Output `DM`
313   @param[in]   op_mult                   `CeedOperator` for forward evaluation
314   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
315   @param[out]  mat                        New MatCeed
316 
317   @return An error code: 0 - success, otherwise - failure
318 **/
319 PetscErrorCode MatCreateCeed(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) {
320   PetscInt       X_l_size, X_g_size, Y_l_size, Y_g_size;
321   VecType        vec_type;
322   MatCeedContext ctx;
323 
324   PetscFunctionBeginUser;
325   PetscCall(MatCeedRegisterLogEvents());
326 
327   // Collect context data
328   PetscCall(DMGetVecType(dm_x, &vec_type));
329   {
330     Vec X;
331 
332     PetscCall(DMGetGlobalVector(dm_x, &X));
333     PetscCall(VecGetSize(X, &X_g_size));
334     PetscCall(VecGetLocalSize(X, &X_l_size));
335     PetscCall(DMRestoreGlobalVector(dm_x, &X));
336   }
337   if (dm_y) {
338     Vec Y;
339 
340     PetscCall(DMGetGlobalVector(dm_y, &Y));
341     PetscCall(VecGetSize(Y, &Y_g_size));
342     PetscCall(VecGetLocalSize(Y, &Y_l_size));
343     PetscCall(DMRestoreGlobalVector(dm_y, &Y));
344   } else {
345     dm_y     = dm_x;
346     Y_g_size = X_g_size;
347     Y_l_size = X_l_size;
348   }
349 
350   // Create context
351   {
352     Vec X_loc, Y_loc_transpose = NULL;
353 
354     PetscCall(DMCreateLocalVector(dm_x, &X_loc));
355     PetscCall(VecZeroEntries(X_loc));
356     if (op_mult_transpose) {
357       PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose));
358       PetscCall(VecZeroEntries(Y_loc_transpose));
359     }
360     PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE,
361                                    MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE_CEEDOP, &ctx));
362     PetscCall(VecDestroy(&X_loc));
363     PetscCall(VecDestroy(&Y_loc_transpose));
364   }
365 
366   // Create mat
367   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat));
368   PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED));
369   // -- Set block and variable block sizes
370   if (dm_x == dm_y) {
371     MatType dm_mat_type, dm_mat_type_copy;
372     Mat     temp_mat;
373 
374     PetscCall(DMGetMatType(dm_x, &dm_mat_type));
375     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
376     PetscCall(DMSetMatType(dm_x, MATAIJ));
377     PetscCall(DMCreateMatrix(dm_x, &temp_mat));
378     PetscCall(DMSetMatType(dm_x, dm_mat_type_copy));
379     PetscCall(PetscFree(dm_mat_type_copy));
380 
381     {
382       PetscInt        block_size, num_blocks, max_vblock_size = PETSC_INT_MAX;
383       const PetscInt *vblock_sizes;
384 
385       // -- Get block sizes
386       PetscCall(MatGetBlockSize(temp_mat, &block_size));
387       PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes));
388       {
389         PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX};
390 
391         for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]);
392         PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max));
393         max_vblock_size = global_min_max[1];
394       }
395 
396       // -- Copy block sizes
397       if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size));
398       if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes));
399 
400       // -- Check libCEED compatibility
401       {
402         bool is_composite;
403 
404         ctx->is_ceed_pbd_valid  = PETSC_TRUE;
405         ctx->is_ceed_vpbd_valid = PETSC_TRUE;
406         PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite));
407         if (is_composite) {
408           CeedInt       num_sub_operators;
409           CeedOperator *sub_operators;
410 
411           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators));
412           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators));
413           for (CeedInt i = 0; i < num_sub_operators; i++) {
414             CeedInt                  num_bases, num_comp;
415             CeedBasis               *active_bases;
416             CeedOperatorAssemblyData assembly_data;
417 
418             PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data));
419             PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
420             PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
421             if (num_bases > 1) {
422               ctx->is_ceed_pbd_valid  = PETSC_FALSE;
423               ctx->is_ceed_vpbd_valid = PETSC_FALSE;
424             }
425             if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
426             if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
427           }
428         } else {
429           // LCOV_EXCL_START
430           CeedInt                  num_bases, num_comp;
431           CeedBasis               *active_bases;
432           CeedOperatorAssemblyData assembly_data;
433 
434           PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data));
435           PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
436           PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
437           if (num_bases > 1) {
438             ctx->is_ceed_pbd_valid  = PETSC_FALSE;
439             ctx->is_ceed_vpbd_valid = PETSC_FALSE;
440           }
441           if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
442           if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
443           // LCOV_EXCL_STOP
444         }
445         {
446           PetscInt local_is_valid[2], global_is_valid[2];
447 
448           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid;
449           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
450           ctx->is_ceed_pbd_valid = global_is_valid[0];
451           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid;
452           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
453           ctx->is_ceed_vpbd_valid = global_is_valid[0];
454         }
455       }
456     }
457     PetscCall(MatDestroy(&temp_mat));
458   }
459   // -- Set internal mat type
460   {
461     VecType vec_type;
462     MatType coo_mat_type;
463 
464     PetscCall(VecGetType(ctx->X_loc, &vec_type));
465     if (strstr(vec_type, VECCUDA)) coo_mat_type = MATAIJCUSPARSE;
466     else if (strstr(vec_type, VECKOKKOS)) coo_mat_type = MATAIJKOKKOS;
467     else coo_mat_type = MATAIJ;
468     PetscCall(PetscStrallocpy(coo_mat_type, &ctx->coo_mat_type));
469   }
470   // -- Set mat operations
471   PetscCall(MatShellSetContextDestroy(*mat, (PetscCtxDestroyFn *)MatCeedContextDestroy));
472   PetscCall(MatShellSetOperation(*mat, MATOP_VIEW, (void (*)(void))MatView_Ceed));
473   PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed));
474   if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
475   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
476   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
477   PetscCall(MatShellSetOperation(*mat, MATOP_GET_BLOCK_DIAGONAL, (void (*)(void))MatGetBlockDiagonal_Ceed));
478   PetscCall(MatShellSetOperation(*mat, MATOP_GET_VBLOCK_DIAGONAL, (void (*)(void))MatGetVariableBlockDiagonal_Ceed));
479   PetscCall(MatShellSetVecType(*mat, vec_type));
480   PetscFunctionReturn(PETSC_SUCCESS);
481 }
482 
483 /**
484   @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`.
485 
486   Collective across MPI processes.
487 
488   @param[in]   mat_ceed   `MATCEED` to copy from
489   @param[out]  mat_other  `MatShell` or `MATCEED` to copy into
490 
491   @return An error code: 0 - success, otherwise - failure
492 **/
493 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) {
494   PetscFunctionBeginUser;
495   PetscCall(MatCeedRegisterLogEvents());
496 
497   // Check type compatibility
498   {
499     PetscBool is_matceed = PETSC_FALSE, is_matshell = PETSC_FALSE;
500     MatType   mat_type_ceed, mat_type_other;
501 
502     PetscCall(MatGetType(mat_ceed, &mat_type_ceed));
503     PetscCall(PetscStrcmp(mat_type_ceed, MATCEED, &is_matceed));
504     PetscCheck(is_matceed, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED);
505     PetscCall(MatGetType(mat_other, &mat_type_other));
506     PetscCall(PetscStrcmp(mat_type_other, MATCEED, &is_matceed));
507     PetscCall(PetscStrcmp(mat_type_other, MATSHELL, &is_matceed));
508     PetscCheck(is_matceed || is_matshell, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_other must have type " MATCEED " or " MATSHELL);
509   }
510 
511   // Check dimension compatibility
512   {
513     PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size;
514 
515     PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size));
516     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size));
517     PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size));
518     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size));
519     PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) &&
520                    (X_l_ceed_size == X_l_other_size),
521                PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ,
522                "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT
523                "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT
524                ", %" PetscInt_FMT ")",
525                Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size);
526   }
527 
528   // Convert
529   {
530     VecType        vec_type;
531     MatCeedContext ctx;
532 
533     PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED));
534     PetscCall(MatShellGetContext(mat_ceed, &ctx));
535     PetscCall(MatCeedContextReference(ctx));
536     PetscCall(MatShellSetContext(mat_other, ctx));
537     PetscCall(MatShellSetContextDestroy(mat_other, (PetscCtxDestroyFn *)MatCeedContextDestroy));
538     PetscCall(MatShellSetOperation(mat_other, MATOP_VIEW, (void (*)(void))MatView_Ceed));
539     PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed));
540     if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
541     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
542     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
543     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_BLOCK_DIAGONAL, (void (*)(void))MatGetBlockDiagonal_Ceed));
544     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_VBLOCK_DIAGONAL, (void (*)(void))MatGetVariableBlockDiagonal_Ceed));
545     {
546       PetscInt block_size;
547 
548       PetscCall(MatGetBlockSize(mat_ceed, &block_size));
549       if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size));
550     }
551     {
552       PetscInt        num_blocks;
553       const PetscInt *block_sizes;
554 
555       PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes));
556       if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes));
557     }
558     PetscCall(DMGetVecType(ctx->dm_x, &vec_type));
559     PetscCall(MatShellSetVecType(mat_other, vec_type));
560   }
561   PetscFunctionReturn(PETSC_SUCCESS);
562 }
563 
564 /**
565   @brief Mark `CeedQFunction` data as updated and the `CeedQFunction` as requiring re-assembly for a `MatCEED`.
566 
567   Collective across MPI processes.
568 
569   @param[in]   mat_ceed       `MATCEED`
570   @param[out]  update_needed  Boolean flag indicating `CeedQFunction` update needed
571 
572   @return An error code: 0 - success, otherwise - failure
573 **/
574 PetscErrorCode MatCeedSetAssemblyDataUpdateNeeded(Mat mat_ceed, PetscBool update_needed) {
575   MatCeedContext ctx;
576 
577   PetscFunctionBeginUser;
578   PetscCall(MatShellGetContext(mat_ceed, &ctx));
579   PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult, update_needed));
580   if (ctx->op_mult_transpose) {
581     PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult_transpose, update_needed));
582   }
583   if (update_needed) {
584     PetscCall(MatAssemblyBegin(mat_ceed, MAT_FINAL_ASSEMBLY));
585     PetscCall(MatAssemblyEnd(mat_ceed, MAT_FINAL_ASSEMBLY));
586   }
587   PetscFunctionReturn(PETSC_SUCCESS);
588 }
589 
590 /**
591   @brief Setup a `Mat` with the same COO pattern as a `MatCEED`.
592 
593   Collective across MPI processes.
594 
595   @param[in]   mat_ceed  `MATCEED`
596   @param[out]  mat_coo   Sparse `Mat` with same COO pattern
597 
598   @return An error code: 0 - success, otherwise - failure
599 **/
600 PetscErrorCode MatCeedCreateMatCOO(Mat mat_ceed, Mat *mat_coo) {
601   MatCeedContext ctx;
602 
603   PetscFunctionBeginUser;
604   PetscCall(MatShellGetContext(mat_ceed, &ctx));
605 
606   PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "COO assembly only supported for MATCEED on a single DM");
607 
608   // Check cl mat type
609   {
610     PetscBool is_coo_mat_type_cl = PETSC_FALSE;
611     char      coo_mat_type_cl[64];
612 
613     // Check for specific CL coo mat type for this Mat
614     {
615       const char *mat_ceed_prefix = NULL;
616 
617       PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix));
618       PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL);
619       PetscCall(PetscOptionsFList("-ceed_coo_mat_type", "Default MATCEED COO assembly MatType", NULL, MatList, coo_mat_type_cl, coo_mat_type_cl,
620                                   sizeof(coo_mat_type_cl), &is_coo_mat_type_cl));
621       PetscOptionsEnd();
622       if (is_coo_mat_type_cl) {
623         PetscCall(PetscFree(ctx->coo_mat_type));
624         PetscCall(PetscStrallocpy(coo_mat_type_cl, &ctx->coo_mat_type));
625       }
626     }
627   }
628 
629   // Create sparse matrix
630   {
631     MatType dm_mat_type, dm_mat_type_copy;
632 
633     PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type));
634     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
635     PetscCall(DMSetMatType(ctx->dm_x, ctx->coo_mat_type));
636     PetscCall(DMCreateMatrix(ctx->dm_x, mat_coo));
637     PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy));
638     PetscCall(PetscFree(dm_mat_type_copy));
639   }
640   PetscFunctionReturn(PETSC_SUCCESS);
641 }
642 
643 /**
644   @brief Setup the COO preallocation `MATCEED` into a `MATAIJ` or similar.
645          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
646 
647   Collective across MPI processes.
648 
649   @param[in]      mat_ceed  `MATCEED` to assemble
650   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
651 
652   @return An error code: 0 - success, otherwise - failure
653 **/
654 PetscErrorCode MatCeedSetPreallocationCOO(Mat mat_ceed, Mat mat_coo) {
655   MatCeedContext ctx;
656 
657   PetscFunctionBeginUser;
658   PetscCall(MatShellGetContext(mat_ceed, &ctx));
659 
660   {
661     PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
662     CeedInt      *rows_ceed, *cols_ceed;
663     PetscCount    num_entries;
664     PetscLogStage stage_amg_setup;
665 
666     // -- Assemble sparsity pattern if mat hasn't been assembled before
667     PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
668     if (stage_amg_setup == -1) {
669       PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
670     }
671     PetscCall(PetscLogStagePush(stage_amg_setup));
672     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
673     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
674     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
675     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
676     PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
677     PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
678     PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
679     free(rows_petsc);
680     free(cols_petsc);
681     if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full));
682     PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full));
683     ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo;
684     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
685     PetscCall(PetscLogStagePop());
686   }
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 /**
691   @brief Assemble a `MATCEED` into a `MATAIJ` or similar.
692          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
693          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
694 
695   Collective across MPI processes.
696 
697   @param[in]      mat_ceed  `MATCEED` to assemble
698   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
699 
700   @return An error code: 0 - success, otherwise - failure
701 **/
702 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) {
703   MatCeedContext ctx;
704 
705   PetscFunctionBeginUser;
706   PetscCall(MatShellGetContext(mat_ceed, &ctx));
707 
708   // Set COO pattern if needed
709   {
710     CeedInt index = -1;
711 
712     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
713       if (ctx->mats_assembled_full[i] == mat_coo) index = i;
714     }
715     if (index == -1) PetscCall(MatCeedSetPreallocationCOO(mat_ceed, mat_coo));
716   }
717 
718   // Assemble mat_ceed
719   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
720   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
721   {
722     const CeedScalar *values;
723     MatType           mat_type;
724     CeedMemType       mem_type = CEED_MEM_HOST;
725     PetscBool         is_spd, is_spd_known;
726 
727     PetscCall(MatGetType(mat_coo, &mat_type));
728     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
729     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
730     else mem_type = CEED_MEM_HOST;
731 
732     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
733     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full));
734     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
735     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values));
736     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
737     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
738     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
739     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values));
740   }
741   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
742   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
743   PetscFunctionReturn(PETSC_SUCCESS);
744 }
745 
746 /**
747   @brief Set the current value of a context field for a `MatCEED`.
748 
749   Not collective across MPI processes.
750 
751   @param[in,out]  mat    `MatCEED`
752   @param[in]      name   Name of the context field
753   @param[in]      value  New context field value
754 
755   @return An error code: 0 - success, otherwise - failure
756 **/
757 PetscErrorCode MatCeedSetContextDouble(Mat mat, const char *name, double value) {
758   PetscBool      was_updated = PETSC_FALSE;
759   MatCeedContext ctx;
760 
761   PetscFunctionBeginUser;
762   PetscCall(MatShellGetContext(mat, &ctx));
763   {
764     CeedContextFieldLabel label = NULL;
765 
766     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult, name, &label));
767     if (label) {
768       double set_value = 2 * value + 1.0;
769 
770       PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
771       if (set_value != value) {
772         PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult, label, &value));
773         was_updated = PETSC_TRUE;
774       }
775     }
776     if (ctx->op_mult_transpose) {
777       label = NULL;
778       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult_transpose, name, &label));
779       if (label) {
780         double set_value = 2 * value + 1.0;
781 
782         PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
783         if (set_value != value) {
784           PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult_transpose, label, &value));
785           was_updated = PETSC_TRUE;
786         }
787       }
788     }
789   }
790   if (was_updated) {
791     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
792     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
793   }
794   PetscFunctionReturn(PETSC_SUCCESS);
795 }
796 
797 /**
798   @brief Get the current value of a context field for a `MatCEED`.
799 
800   Not collective across MPI processes.
801 
802   @param[in]   mat    `MatCEED`
803   @param[in]   name   Name of the context field
804   @param[out]  value  Current context field value
805 
806   @return An error code: 0 - success, otherwise - failure
807 **/
808 PetscErrorCode MatCeedGetContextDouble(Mat mat, const char *name, double *value) {
809   MatCeedContext ctx;
810 
811   PetscFunctionBeginUser;
812   PetscCall(MatShellGetContext(mat, &ctx));
813   {
814     CeedContextFieldLabel label = NULL;
815     CeedOperator          op    = ctx->op_mult;
816 
817     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
818     if (!label && ctx->op_mult_transpose) {
819       op = ctx->op_mult_transpose;
820       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
821     }
822     if (label) {
823       PetscSizeT    num_values;
824       const double *values_ceed;
825 
826       PetscCallCeed(ctx->ceed, CeedOperatorGetContextDoubleRead(op, label, &num_values, &values_ceed));
827       *value = values_ceed[0];
828       PetscCallCeed(ctx->ceed, CeedOperatorRestoreContextDoubleRead(op, label, &values_ceed));
829     }
830   }
831   PetscFunctionReturn(PETSC_SUCCESS);
832 }
833 
834 /**
835   @brief Set the current `PetscReal` value of a context field for a `MatCEED`.
836 
837   Not collective across MPI processes.
838 
839   @param[in,out]  mat    `MatCEED`
840   @param[in]      name   Name of the context field
841   @param[in]      value  New context field value
842 
843   @return An error code: 0 - success, otherwise - failure
844 **/
845 PetscErrorCode MatCeedSetContextReal(Mat mat, const char *name, PetscReal value) {
846   double value_double = value;
847 
848   PetscFunctionBeginUser;
849   PetscCall(MatCeedSetContextDouble(mat, name, value_double));
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 /**
854   @brief Get the current `PetscReal` value of a context field for a `MatCEED`.
855 
856   Not collective across MPI processes.
857 
858   @param[in]   mat    `MatCEED`
859   @param[in]   name   Name of the context field
860   @param[out]  value  Current context field value
861 
862   @return An error code: 0 - success, otherwise - failure
863 **/
864 PetscErrorCode MatCeedGetContextReal(Mat mat, const char *name, PetscReal *value) {
865   double value_double = 0.0;
866 
867   PetscFunctionBeginUser;
868   PetscCall(MatCeedGetContextDouble(mat, name, &value_double));
869   *value = value_double;
870   PetscFunctionReturn(PETSC_SUCCESS);
871 }
872 
873 /**
874   @brief Set the current time for a `MatCEED`.
875 
876   Not collective across MPI processes.
877 
878   @param[in,out]  mat   `MatCEED`
879   @param[in]      time  Current time
880 
881   @return An error code: 0 - success, otherwise - failure
882 **/
883 PetscErrorCode MatCeedSetTime(Mat mat, PetscReal time) {
884   PetscFunctionBeginUser;
885   {
886     double time_ceed = time;
887 
888     PetscCall(MatCeedSetContextDouble(mat, "time", time_ceed));
889   }
890   PetscFunctionReturn(PETSC_SUCCESS);
891 }
892 
893 /**
894   @brief Get the current time for a `MatCEED`.
895 
896   Not collective across MPI processes.
897 
898   @param[in]   mat   `MatCEED`
899   @param[out]  time  Current time, or -1.0 if the boundary evaluator has no time field
900 
901   @return An error code: 0 - success, otherwise - failure
902 **/
903 PetscErrorCode MatCeedGetTime(Mat mat, PetscReal *time) {
904   PetscFunctionBeginUser;
905   *time = -1.0;
906   {
907     double time_ceed = -1.0;
908 
909     PetscCall(MatCeedGetContextDouble(mat, "time", &time_ceed));
910     *time = time_ceed;
911   }
912   PetscFunctionReturn(PETSC_SUCCESS);
913 }
914 
915 /**
916   @brief Set the current time step for a `MatCEED`.
917 
918   Not collective across MPI processes.
919 
920   @param[in,out]  mat  `MatCEED`
921   @param[in]      dt   Current time step
922 
923   @return An error code: 0 - success, otherwise - failure
924 **/
925 PetscErrorCode MatCeedSetDt(Mat mat, PetscReal dt) {
926   PetscFunctionBeginUser;
927   {
928     double dt_ceed = dt;
929 
930     PetscCall(MatCeedSetContextDouble(mat, "dt", dt_ceed));
931   }
932   PetscFunctionReturn(PETSC_SUCCESS);
933 }
934 
935 /**
936   @brief Set the Jacobian shifts for a `MatCEED`.
937 
938   Not collective across MPI processes.
939 
940   @param[in,out]  mat      `MatCEED`
941   @param[in]      shift_v  Velocity shift
942   @param[in]      shift_a  Acceleration shift
943 
944   @return An error code: 0 - success, otherwise - failure
945 **/
946 PetscErrorCode MatCeedSetShifts(Mat mat, PetscReal shift_v, PetscReal shift_a) {
947   PetscFunctionBeginUser;
948   {
949     double shift_v_ceed = shift_v;
950 
951     PetscCall(MatCeedSetContextDouble(mat, "shift v", shift_v_ceed));
952   }
953   if (shift_a) {
954     double shift_a_ceed = shift_a;
955 
956     PetscCall(MatCeedSetContextDouble(mat, "shift a", shift_a_ceed));
957   }
958   PetscFunctionReturn(PETSC_SUCCESS);
959 }
960 
961 /**
962   @brief Set user context for a `MATCEED`.
963 
964   Collective across MPI processes.
965 
966   @param[in,out]  mat  `MATCEED`
967   @param[in]      f    The context destroy function, or NULL
968   @param[in]      ctx  User context, or NULL to unset
969 
970   @return An error code: 0 - success, otherwise - failure
971 **/
972 PetscErrorCode MatCeedSetContext(Mat mat, PetscCtxDestroyFn f, void *ctx) {
973   PetscContainer user_ctx = NULL;
974 
975   PetscFunctionBeginUser;
976   if (ctx) {
977     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx));
978     PetscCall(PetscContainerSetPointer(user_ctx, ctx));
979     PetscCall(PetscContainerSetCtxDestroy(user_ctx, f));
980   }
981   PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx));
982   PetscCall(PetscContainerDestroy(&user_ctx));
983   PetscFunctionReturn(PETSC_SUCCESS);
984 }
985 
986 /**
987   @brief Retrieve the user context for a `MATCEED`.
988 
989   Collective across MPI processes.
990 
991   @param[in,out]  mat  `MATCEED`
992   @param[in]      ctx  User context
993 
994   @return An error code: 0 - success, otherwise - failure
995 **/
996 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) {
997   PetscContainer user_ctx;
998 
999   PetscFunctionBeginUser;
1000   PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx));
1001   if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx));
1002   else *(void **)ctx = NULL;
1003   PetscFunctionReturn(PETSC_SUCCESS);
1004 }
1005 /**
1006   @brief Set a user defined matrix operation for a `MATCEED` matrix.
1007 
1008   Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by
1009 `MatCeedSetContext()`.
1010 
1011   Collective across MPI processes.
1012 
1013   @param[in,out]  mat  `MATCEED`
1014   @param[in]      op   Name of the `MatOperation`
1015   @param[in]      g    Function that provides the operation
1016 
1017   @return An error code: 0 - success, otherwise - failure
1018 **/
1019 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) {
1020   PetscFunctionBeginUser;
1021   PetscCall(MatShellSetOperation(mat, op, g));
1022   PetscFunctionReturn(PETSC_SUCCESS);
1023 }
1024 
1025 /**
1026   @brief Sets the default COO matrix type as a string from the `MATCEED`.
1027 
1028   Collective across MPI processes.
1029 
1030   @param[in,out]  mat   `MATCEED`
1031   @param[in]      type  COO `MatType` to set
1032 
1033   @return An error code: 0 - success, otherwise - failure
1034 **/
1035 PetscErrorCode MatCeedSetCOOMatType(Mat mat, MatType type) {
1036   MatCeedContext ctx;
1037 
1038   PetscFunctionBeginUser;
1039   PetscCall(MatShellGetContext(mat, &ctx));
1040   // Check if same
1041   {
1042     size_t    len_old, len_new;
1043     PetscBool is_same = PETSC_FALSE;
1044 
1045     PetscCall(PetscStrlen(ctx->coo_mat_type, &len_old));
1046     PetscCall(PetscStrlen(type, &len_new));
1047     if (len_old == len_new) PetscCall(PetscStrncmp(ctx->coo_mat_type, type, len_old, &is_same));
1048     if (is_same) PetscFunctionReturn(PETSC_SUCCESS);
1049   }
1050   // Clean up old mats in different format
1051   // LCOV_EXCL_START
1052   if (ctx->mat_assembled_full_internal) {
1053     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
1054       if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) {
1055         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) {
1056           ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j];
1057         }
1058         ctx->num_mats_assembled_full--;
1059         // Note: we'll realloc this array again, so no need to shrink the allocation
1060         PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1061       }
1062     }
1063   }
1064   if (ctx->mat_assembled_pbd_internal) {
1065     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
1066       if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) {
1067         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) {
1068           ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j];
1069         }
1070         // Note: we'll realloc this array again, so no need to shrink the allocation
1071         ctx->num_mats_assembled_pbd--;
1072         PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1073       }
1074     }
1075   }
1076   PetscCall(PetscFree(ctx->coo_mat_type));
1077   PetscCall(PetscStrallocpy(type, &ctx->coo_mat_type));
1078   PetscFunctionReturn(PETSC_SUCCESS);
1079   // LCOV_EXCL_STOP
1080 }
1081 
1082 /**
1083   @brief Gets the default COO matrix type as a string from the `MATCEED`.
1084 
1085   Collective across MPI processes.
1086 
1087   @param[in,out]  mat   `MATCEED`
1088   @param[in]      type  COO `MatType`
1089 
1090   @return An error code: 0 - success, otherwise - failure
1091 **/
1092 PetscErrorCode MatCeedGetCOOMatType(Mat mat, MatType *type) {
1093   MatCeedContext ctx;
1094 
1095   PetscFunctionBeginUser;
1096   PetscCall(MatShellGetContext(mat, &ctx));
1097   *type = ctx->coo_mat_type;
1098   PetscFunctionReturn(PETSC_SUCCESS);
1099 }
1100 
1101 /**
1102   @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1103 
1104   Not collective across MPI processes.
1105 
1106   @param[in,out]  mat              `MATCEED`
1107   @param[in]      X_loc            Input PETSc local vector, or NULL
1108   @param[in]      Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1109 
1110   @return An error code: 0 - success, otherwise - failure
1111 **/
1112 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) {
1113   MatCeedContext ctx;
1114 
1115   PetscFunctionBeginUser;
1116   PetscCall(MatShellGetContext(mat, &ctx));
1117   if (X_loc) {
1118     PetscInt len_old, len_new;
1119 
1120     PetscCall(VecGetSize(ctx->X_loc, &len_old));
1121     PetscCall(VecGetSize(X_loc, &len_new));
1122     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT,
1123                len_new, len_old);
1124     PetscCall(VecReferenceCopy(X_loc, &ctx->X_loc));
1125   }
1126   if (Y_loc_transpose) {
1127     PetscInt len_old, len_new;
1128 
1129     PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old));
1130     PetscCall(VecGetSize(Y_loc_transpose, &len_new));
1131     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB,
1132                "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old);
1133     PetscCall(VecReferenceCopy(Y_loc_transpose, &ctx->Y_loc_transpose));
1134   }
1135   PetscFunctionReturn(PETSC_SUCCESS);
1136 }
1137 
1138 /**
1139   @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1140 
1141   Not collective across MPI processes.
1142 
1143   @param[in,out]  mat              `MATCEED`
1144   @param[out]     X_loc            Input PETSc local vector, or NULL
1145   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1146 
1147   @return An error code: 0 - success, otherwise - failure
1148 **/
1149 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1150   MatCeedContext ctx;
1151 
1152   PetscFunctionBeginUser;
1153   PetscCall(MatShellGetContext(mat, &ctx));
1154   if (X_loc) {
1155     *X_loc = NULL;
1156     PetscCall(VecReferenceCopy(ctx->X_loc, X_loc));
1157   }
1158   if (Y_loc_transpose) {
1159     *Y_loc_transpose = NULL;
1160     PetscCall(VecReferenceCopy(ctx->Y_loc_transpose, Y_loc_transpose));
1161   }
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 /**
1166   @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1167 
1168   Not collective across MPI processes.
1169 
1170   @param[in,out]  mat              MatCeed
1171   @param[out]     X_loc            Input PETSc local vector, or NULL
1172   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1173 
1174   @return An error code: 0 - success, otherwise - failure
1175 **/
1176 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1177   PetscFunctionBeginUser;
1178   if (X_loc) PetscCall(VecDestroy(X_loc));
1179   if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose));
1180   PetscFunctionReturn(PETSC_SUCCESS);
1181 }
1182 
1183 /**
1184   @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1185 
1186   Not collective across MPI processes.
1187 
1188   @param[in,out]  mat                MatCeed
1189   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1190   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1191 
1192   @return An error code: 0 - success, otherwise - failure
1193 **/
1194 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1195   MatCeedContext ctx;
1196 
1197   PetscFunctionBeginUser;
1198   PetscCall(MatShellGetContext(mat, &ctx));
1199   if (op_mult) {
1200     *op_mult = NULL;
1201     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult));
1202   }
1203   if (op_mult_transpose) {
1204     *op_mult_transpose = NULL;
1205     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose));
1206   }
1207   PetscFunctionReturn(PETSC_SUCCESS);
1208 }
1209 
1210 /**
1211   @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1212 
1213   Not collective across MPI processes.
1214 
1215   @param[in,out]  mat                MatCeed
1216   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1217   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1218 
1219   @return An error code: 0 - success, otherwise - failure
1220 **/
1221 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1222   MatCeedContext ctx;
1223 
1224   PetscFunctionBeginUser;
1225   PetscCall(MatShellGetContext(mat, &ctx));
1226   if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult));
1227   if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose));
1228   PetscFunctionReturn(PETSC_SUCCESS);
1229 }
1230 
1231 /**
1232   @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1233 
1234   Not collective across MPI processes.
1235 
1236   @param[in,out]  mat                       MatCeed
1237   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1238   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1239 
1240   @return An error code: 0 - success, otherwise - failure
1241 **/
1242 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1243   MatCeedContext ctx;
1244 
1245   PetscFunctionBeginUser;
1246   PetscCall(MatShellGetContext(mat, &ctx));
1247   if (log_event_mult) ctx->log_event_mult = log_event_mult;
1248   if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose;
1249   PetscFunctionReturn(PETSC_SUCCESS);
1250 }
1251 
1252 /**
1253   @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1254 
1255   Not collective across MPI processes.
1256 
1257   @param[in,out]  mat                       MatCeed
1258   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1259   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1260 
1261   @return An error code: 0 - success, otherwise - failure
1262 **/
1263 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1264   MatCeedContext ctx;
1265 
1266   PetscFunctionBeginUser;
1267   PetscCall(MatShellGetContext(mat, &ctx));
1268   if (log_event_mult) *log_event_mult = ctx->log_event_mult;
1269   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose;
1270   PetscFunctionReturn(PETSC_SUCCESS);
1271 }
1272 
1273 /**
1274   @brief Set `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1275 
1276   Not collective across MPI processes.
1277 
1278   @param[in,out]  mat                       MatCeed
1279   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1280   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1281 
1282   @return An error code: 0 - success, otherwise - failure
1283 **/
1284 PetscErrorCode MatCeedSetCeedOperatorLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1285   MatCeedContext ctx;
1286 
1287   PetscFunctionBeginUser;
1288   PetscCall(MatShellGetContext(mat, &ctx));
1289   if (log_event_mult) ctx->log_event_ceed_mult = log_event_mult;
1290   if (log_event_mult_transpose) ctx->log_event_ceed_mult_transpose = log_event_mult_transpose;
1291   PetscFunctionReturn(PETSC_SUCCESS);
1292 }
1293 
1294 /**
1295   @brief Get `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1296 
1297   Not collective across MPI processes.
1298 
1299   @param[in,out]  mat                       MatCeed
1300   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1301   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1302 
1303   @return An error code: 0 - success, otherwise - failure
1304 **/
1305 PetscErrorCode MatCeedGetCeedOperatorLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1306   MatCeedContext ctx;
1307 
1308   PetscFunctionBeginUser;
1309   PetscCall(MatShellGetContext(mat, &ctx));
1310   if (log_event_mult) *log_event_mult = ctx->log_event_ceed_mult;
1311   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_ceed_mult_transpose;
1312   PetscFunctionReturn(PETSC_SUCCESS);
1313 }
1314 
1315 // -----------------------------------------------------------------------------
1316 // Operator context data
1317 // -----------------------------------------------------------------------------
1318 
1319 /**
1320   @brief Setup context data for operator application.
1321 
1322   Collective across MPI processes.
1323 
1324   @param[in]   dm_x                           Input `DM`
1325   @param[in]   dm_y                           Output `DM`
1326   @param[in]   X_loc                          Input PETSc local vector, or NULL
1327   @param[in]   Y_loc_transpose                Input PETSc local vector for transpose operation, or NULL
1328   @param[in]   op_mult                        `CeedOperator` for forward evaluation
1329   @param[in]   op_mult_transpose              `CeedOperator` for transpose evaluation
1330   @param[in]   log_event_mult                 `PetscLogEvent` for forward evaluation
1331   @param[in]   log_event_mult_transpose       `PetscLogEvent` for transpose evaluation
1332   @param[in]   log_event_ceed_mult            `PetscLogEvent` for forward `CeedOperator` evaluation
1333   @param[in]   log_event_ceed_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation
1334   @param[out]  ctx                            Context data for operator evaluation
1335 
1336   @return An error code: 0 - success, otherwise - failure
1337 **/
1338 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose,
1339                                     PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, PetscLogEvent log_event_ceed_mult,
1340                                     PetscLogEvent log_event_ceed_mult_transpose, MatCeedContext *ctx) {
1341   CeedSize x_loc_len, y_loc_len;
1342 
1343   PetscFunctionBeginUser;
1344 
1345   // Allocate
1346   PetscCall(PetscNew(ctx));
1347   (*ctx)->ref_count = 1;
1348 
1349   // Logging
1350   (*ctx)->log_event_mult                = log_event_mult;
1351   (*ctx)->log_event_mult_transpose      = log_event_mult_transpose;
1352   (*ctx)->log_event_ceed_mult           = log_event_ceed_mult;
1353   (*ctx)->log_event_ceed_mult_transpose = log_event_ceed_mult_transpose;
1354 
1355   // PETSc objects
1356   PetscCall(DMReferenceCopy(dm_x, &(*ctx)->dm_x));
1357   PetscCall(DMReferenceCopy(dm_y, &(*ctx)->dm_y));
1358   if (X_loc) PetscCall(VecReferenceCopy(X_loc, &(*ctx)->X_loc));
1359   if (Y_loc_transpose) PetscCall(VecReferenceCopy(Y_loc_transpose, &(*ctx)->Y_loc_transpose));
1360 
1361   // Memtype
1362   {
1363     const PetscScalar *x;
1364     Vec                X;
1365 
1366     PetscCall(DMGetLocalVector(dm_x, &X));
1367     PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type));
1368     PetscCall(VecRestoreArrayReadAndMemType(X, &x));
1369     PetscCall(DMRestoreLocalVector(dm_x, &X));
1370   }
1371 
1372   // libCEED objects
1373   PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB,
1374              "retrieving Ceed context object failed");
1375   PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len));
1376   PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult));
1377   if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose));
1378   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc));
1379   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc));
1380 
1381   // Flop counting
1382   {
1383     CeedSize ceed_flops_estimate = 0;
1384 
1385     PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate));
1386     (*ctx)->flops_mult = ceed_flops_estimate;
1387     if (op_mult_transpose) {
1388       PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate));
1389       (*ctx)->flops_mult_transpose = ceed_flops_estimate;
1390     }
1391   }
1392 
1393   // Check sizes
1394   if (x_loc_len > 0 || y_loc_len > 0) {
1395     CeedSize ctx_x_loc_len, ctx_y_loc_len;
1396     PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len;
1397     Vec      dm_X_loc, dm_Y_loc;
1398 
1399     // -- Input
1400     PetscCall(DMGetLocalVector(dm_x, &dm_X_loc));
1401     PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len));
1402     PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc));
1403     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len));
1404     if (X_loc) {
1405       PetscCall(VecGetLocalSize(X_loc, &X_loc_len));
1406       PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1407                  "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len);
1408     }
1409     PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions",
1410                x_loc_len, dm_x_loc_len);
1411     PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")",
1412                x_loc_len, ctx_x_loc_len);
1413 
1414     // -- Output
1415     PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc));
1416     PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len));
1417     PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc));
1418     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len));
1419     PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions",
1420                ctx_y_loc_len, dm_y_loc_len);
1421 
1422     // -- Transpose
1423     if (Y_loc_transpose) {
1424       PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len));
1425       PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1426                  "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len);
1427     }
1428   }
1429   PetscFunctionReturn(PETSC_SUCCESS);
1430 }
1431 
1432 /**
1433   @brief Increment reference counter for `MATCEED` context.
1434 
1435   Not collective across MPI processes.
1436 
1437   @param[in,out]  ctx  Context data
1438 
1439   @return An error code: 0 - success, otherwise - failure
1440 **/
1441 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) {
1442   PetscFunctionBeginUser;
1443   ctx->ref_count++;
1444   PetscFunctionReturn(PETSC_SUCCESS);
1445 }
1446 
1447 /**
1448   @brief Copy reference for `MATCEED`.
1449          Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`.
1450 
1451   Not collective across MPI processes.
1452 
1453   @param[in]   ctx       Context data
1454   @param[out]  ctx_copy  Copy of pointer to context data
1455 
1456   @return An error code: 0 - success, otherwise - failure
1457 **/
1458 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) {
1459   PetscFunctionBeginUser;
1460   PetscCall(MatCeedContextReference(ctx));
1461   PetscCall(MatCeedContextDestroy(ctx_copy));
1462   *ctx_copy = ctx;
1463   PetscFunctionReturn(PETSC_SUCCESS);
1464 }
1465 
1466 /**
1467   @brief Destroy context data for operator application.
1468 
1469   Collective across MPI processes.
1470 
1471   @param[in,out]  ctx  Context data for operator evaluation
1472 
1473   @return An error code: 0 - success, otherwise - failure
1474 **/
1475 PetscErrorCode MatCeedContextDestroy(MatCeedContext *ctx) {
1476   PetscFunctionBeginUser;
1477   if (!ctx || --(*ctx)->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS);
1478 
1479   // PETSc objects
1480   PetscCall(DMDestroy(&(*ctx)->dm_x));
1481   PetscCall(DMDestroy(&(*ctx)->dm_y));
1482   PetscCall(VecDestroy(&(*ctx)->X_loc));
1483   PetscCall(VecDestroy(&(*ctx)->Y_loc_transpose));
1484   PetscCall(MatDestroy(&(*ctx)->mat_assembled_full_internal));
1485   PetscCall(MatDestroy(&(*ctx)->mat_assembled_pbd_internal));
1486   PetscCall(PetscFree((*ctx)->coo_mat_type));
1487   PetscCall(PetscFree((*ctx)->mats_assembled_full));
1488   PetscCall(PetscFree((*ctx)->mats_assembled_pbd));
1489 
1490   // libCEED objects
1491   PetscCallCeed((*ctx)->ceed, CeedVectorDestroy(&(*ctx)->x_loc));
1492   PetscCallCeed((*ctx)->ceed, CeedVectorDestroy(&(*ctx)->y_loc));
1493   PetscCallCeed((*ctx)->ceed, CeedVectorDestroy(&(*ctx)->coo_values_full));
1494   PetscCallCeed((*ctx)->ceed, CeedVectorDestroy(&(*ctx)->coo_values_pbd));
1495   PetscCallCeed((*ctx)->ceed, CeedOperatorDestroy(&(*ctx)->op_mult));
1496   PetscCallCeed((*ctx)->ceed, CeedOperatorDestroy(&(*ctx)->op_mult_transpose));
1497   PetscCheck(CeedDestroy(&(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed");
1498 
1499   // Deallocate
1500   (*ctx)->is_destroyed = PETSC_TRUE;  // Flag as destroyed in case someone has stale ref
1501   PetscCall(PetscFree(*ctx));
1502   PetscFunctionReturn(PETSC_SUCCESS);
1503 }
1504 
1505 /**
1506   @brief Compute the diagonal of an operator via libCEED.
1507 
1508   Collective across MPI processes.
1509 
1510   @param[in]   A  `MATCEED`
1511   @param[out]  D  Vector holding operator diagonal
1512 
1513   @return An error code: 0 - success, otherwise - failure
1514 **/
1515 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) {
1516   PetscMemType   mem_type;
1517   Vec            D_loc;
1518   MatCeedContext ctx;
1519 
1520   PetscFunctionBeginUser;
1521   PetscCall(MatShellGetContext(A, &ctx));
1522 
1523   // Place PETSc vector in libCEED vector
1524   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1525   PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc));
1526   PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc));
1527 
1528   // Compute Diagonal
1529   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1530   PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1531   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1532 
1533   // Restore PETSc vector
1534   PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc));
1535 
1536   // Local-to-Global
1537   PetscCall(VecZeroEntries(D));
1538   PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D));
1539   PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc));
1540   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1541   PetscFunctionReturn(PETSC_SUCCESS);
1542 }
1543 
1544 /**
1545   @brief Compute `A X = Y` for a `MATCEED`.
1546 
1547   Collective across MPI processes.
1548 
1549   @param[in]   A  `MATCEED`
1550   @param[in]   X  Input PETSc vector
1551   @param[out]  Y  Output PETSc vector
1552 
1553   @return An error code: 0 - success, otherwise - failure
1554 **/
1555 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) {
1556   MatCeedContext ctx;
1557 
1558   PetscFunctionBeginUser;
1559   PetscCall(MatShellGetContext(A, &ctx));
1560   PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, NULL));
1561 
1562   {
1563     PetscMemType x_mem_type, y_mem_type;
1564     Vec          X_loc = ctx->X_loc, Y_loc;
1565 
1566     // Get local vectors
1567     if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1568     PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1569 
1570     // Global-to-local
1571     PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc));
1572 
1573     // Setup libCEED vectors
1574     PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1575     PetscCall(VecZeroEntries(Y_loc));
1576     PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1577 
1578     // Apply libCEED operator
1579     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult, A, X, Y, NULL));
1580     PetscCall(PetscLogGpuTimeBegin());
1581     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE));
1582     PetscCall(PetscLogGpuTimeEnd());
1583     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult, A, X, Y, NULL));
1584 
1585     // Restore PETSc vectors
1586     PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1587     PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1588 
1589     // Local-to-global
1590     PetscCall(VecZeroEntries(Y));
1591     PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y));
1592 
1593     // Restore local vectors, as needed
1594     if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1595     PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1596   }
1597 
1598   // Log flops
1599   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult));
1600   else PetscCall(PetscLogFlops(ctx->flops_mult));
1601   PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, NULL));
1602   PetscFunctionReturn(PETSC_SUCCESS);
1603 }
1604 
1605 /**
1606   @brief Compute `A^T Y = X` for a `MATCEED`.
1607 
1608   Collective across MPI processes.
1609 
1610   @param[in]   A  `MATCEED`
1611   @param[in]   Y  Input PETSc vector
1612   @param[out]  X  Output PETSc vector
1613 
1614   @return An error code: 0 - success, otherwise - failure
1615 **/
1616 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) {
1617   MatCeedContext ctx;
1618 
1619   PetscFunctionBeginUser;
1620   PetscCall(MatShellGetContext(A, &ctx));
1621   PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, NULL));
1622 
1623   {
1624     PetscMemType x_mem_type, y_mem_type;
1625     Vec          X_loc, Y_loc = ctx->Y_loc_transpose;
1626 
1627     // Get local vectors
1628     if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1629     PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1630 
1631     // Global-to-local
1632     PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc));
1633 
1634     // Setup libCEED vectors
1635     PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1636     PetscCall(VecZeroEntries(X_loc));
1637     PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1638 
1639     // Apply libCEED operator
1640     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1641     PetscCall(PetscLogGpuTimeBegin());
1642     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1643     PetscCall(PetscLogGpuTimeEnd());
1644     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1645 
1646     // Restore PETSc vectors
1647     PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1648     PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1649 
1650     // Local-to-global
1651     PetscCall(VecZeroEntries(X));
1652     PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X));
1653 
1654     // Restore local vectors, as needed
1655     if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1656     PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1657   }
1658 
1659   // Log flops
1660   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose));
1661   else PetscCall(PetscLogFlops(ctx->flops_mult_transpose));
1662   PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, NULL));
1663   PetscFunctionReturn(PETSC_SUCCESS);
1664 }
1665