xref: /honee/src/mat-ceed.c (revision 5a58c7e9b4afbaa4da9fc751200e39c2273dd3bf)
1 // SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors.
2 // SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause
3 /// @file
4 /// MatCEED implementation
5 
6 #include <ceed.h>
7 #include <ceed/backend.h>
8 #include <mat-ceed-impl.h>
9 #include <mat-ceed.h>
10 #include <petsc-ceed-utils.h>
11 #include <petsc-ceed.h>
12 #include <petscdm.h>
13 #include <petscmat.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 
19 PetscClassId  MATCEED_CLASSID;
20 PetscLogEvent MATCEED_MULT, MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE, MATCEED_MULT_TRANSPOSE_CEEDOP, MATCEED_ASSEMBLE_DIAGONAL,
21     MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, MATCEED_SETUP_PBDIAGONAL, MATCEED_SETUP_PBDIAGONAL_CEEDOP, MATCEED_ASSEMBLE_PBDIAGONAL,
22     MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, MATCEED_SETUP_FULL, MATCEED_SETUP_FULL_CEEDOP, MATCEED_ASSEMBLE_FULL, MATCEED_ASSEMBLE_FULL_CEEDOP;
23 
24 /**
25   @brief Register MATCEED log events.
26 
27   Not collective across MPI processes.
28 
29   @return An error code: 0 - success, otherwise - failure
30 **/
31 static PetscErrorCode MatCeedRegisterLogEvents() {
32   static PetscBool registered = PETSC_FALSE;
33 
34   PetscFunctionBeginUser;
35   if (registered) PetscFunctionReturn(PETSC_SUCCESS);
36   PetscCall(PetscClassIdRegister("MatCEED", &MATCEED_CLASSID));
37   PetscCall(PetscLogEventRegister("MatCEEDMul", MATCEED_CLASSID, &MATCEED_MULT));
38   PetscCall(PetscLogEventRegister("MatCEEDMulCeed", MATCEED_CLASSID, &MATCEED_MULT_CEEDOP));
39   PetscCall(PetscLogEventRegister("MatCEEDMulT", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE));
40   PetscCall(PetscLogEventRegister("MatCEEDMulTCeed", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE_CEEDOP));
41   PetscCall(PetscLogEventRegister("MatCEEDAsmDiag", MATCEED_CLASSID, &MATCEED_ASSEMBLE_DIAGONAL));
42   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSU", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL));
43   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL_CEEDOP));
44   PetscCall(PetscLogEventRegister("MatCEEDAsmPBD", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL));
45   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP));
46   PetscCall(PetscLogEventRegister("MatCEEDAsmSU", MATCEED_CLASSID, &MATCEED_SETUP_FULL));
47   PetscCall(PetscLogEventRegister("MatCEEDAsmSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_FULL_CEEDOP));
48   PetscCall(PetscLogEventRegister("MatCEEDAsm", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL));
49   PetscCall(PetscLogEventRegister("MatCEEDAsmCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL_CEEDOP));
50   registered = PETSC_TRUE;
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 /**
55   @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar.
56          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
57          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
58 
59   Collective across MPI processes.
60 
61   @param[in]      mat_ceed  `MATCEED` to assemble
62   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
63 
64   @return An error code: 0 - success, otherwise - failure
65 **/
66 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) {
67   MatCeedContext ctx;
68 
69   PetscFunctionBeginUser;
70   PetscCall(MatShellGetContext(mat_ceed, &ctx));
71 
72   // Check if COO pattern set
73   {
74     PetscInt index = -1;
75 
76     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
77       if (ctx->mats_assembled_pbd[i] == mat_coo) index = i;
78     }
79     if (index == -1) {
80       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
81       CeedInt      *rows_ceed, *cols_ceed;
82       PetscCount    num_entries;
83       PetscLogStage stage_amg_setup;
84 
85       // -- Assemble sparsity pattern if mat hasn't been assembled before
86       PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
87       if (stage_amg_setup == -1) {
88         PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
89       }
90       PetscCall(PetscLogStagePush(stage_amg_setup));
91       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
92       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
93       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
94       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
95       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
96       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
97       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
98       free(rows_petsc);
99       free(cols_petsc);
100       if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd));
101       PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd));
102       ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo;
103       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
104       PetscCall(PetscLogStagePop());
105     }
106   }
107 
108   // Assemble mat_ceed
109   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
110   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
111   {
112     const CeedScalar *values;
113     MatType           mat_type;
114     CeedMemType       mem_type = CEED_MEM_HOST;
115     PetscBool         is_spd, is_spd_known;
116 
117     PetscCall(MatGetType(mat_coo, &mat_type));
118     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
119     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
120     else mem_type = CEED_MEM_HOST;
121 
122     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
123     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE));
124     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
125     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values));
126     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
127     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
128     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
129     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values));
130   }
131   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
132   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
133   PetscFunctionReturn(PETSC_SUCCESS);
134 }
135 
136 /**
137   @brief Assemble inner `Mat` for diagonal `PC` operations
138 
139   Collective across MPI processes.
140 
141   @param[in]   mat_ceed      `MATCEED` to invert
142   @param[in]   use_ceed_pbd  Boolean flag to use libCEED PBD assembly
143   @param[out]  mat_inner     Inner `Mat` for diagonal operations
144 
145   @return An error code: 0 - success, otherwise - failure
146 **/
147 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) {
148   MatCeedContext ctx;
149 
150   PetscFunctionBeginUser;
151   PetscCall(MatShellGetContext(mat_ceed, &ctx));
152   if (use_ceed_pbd) {
153     // Check if COO pattern set
154     if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_pbd_internal));
155 
156     // Assemble mat_assembled_full_internal
157     PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal));
158     if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal;
159   } else {
160     // Check if COO pattern set
161     if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
162 
163     // Assemble mat_assembled_full_internal
164     PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
165     if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal;
166   }
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 /**
171   @brief Get on-process diagonal block of `MATCEED`
172 
173   This is a block per-process of the diagonal of the global matrix.
174   This is NOT the diagonal blocks associated with the block size of the matrix (i.e. `MatSetBlockSize()` has no effect on this function).
175 
176   Collective across MPI processes.
177 
178   @param[in]   mat_ceed   `MATCEED` to invert
179   @param[out]  mat_block  The diagonal block matrix
180 
181   @return An error code: 0 - success, otherwise - failure
182 **/
183 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) {
184   MatCeedContext ctx;
185 
186   PetscFunctionBeginUser;
187   PetscCall(MatShellGetContext(mat_ceed, &ctx));
188 
189   // Check if COO pattern set
190   if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
191 
192   // Assemble mat_assembled_full_internal
193   PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
194 
195   // Get diagonal block
196   PetscCall(MatGetDiagonalBlock(ctx->mat_assembled_full_internal, mat_block));
197   PetscFunctionReturn(PETSC_SUCCESS);
198 }
199 
200 /**
201   @brief Invert `MATCEED` diagonal block for Jacobi.
202 
203   Collective across MPI processes.
204 
205   @param[in]   mat_ceed  `MATCEED` to invert
206   @param[out]  values    The block inverses in column major order
207 
208   @return An error code: 0 - success, otherwise - failure
209 **/
210 static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) {
211   Mat            mat_inner = NULL;
212   MatCeedContext ctx;
213 
214   PetscFunctionBeginUser;
215   PetscCall(MatShellGetContext(mat_ceed, &ctx));
216 
217   // Assemble inner mat if needed
218   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
219 
220   // Invert PB diagonal
221   PetscCall(MatInvertBlockDiagonal(mat_inner, values));
222   PetscFunctionReturn(PETSC_SUCCESS);
223 }
224 
225 /**
226   @brief Invert `MATCEED` variable diagonal block for Jacobi.
227 
228   Collective across MPI processes.
229 
230   @param[in]   mat_ceed     `MATCEED` to invert
231   @param[in]   num_blocks   The number of blocks on the process
232   @param[in]   block_sizes  The size of each block on the process
233   @param[out]  values       The block inverses in column major order
234 
235   @return An error code: 0 - success, otherwise - failure
236 **/
237 static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) {
238   Mat            mat_inner = NULL;
239   MatCeedContext ctx;
240 
241   PetscFunctionBeginUser;
242   PetscCall(MatShellGetContext(mat_ceed, &ctx));
243 
244   // Assemble inner mat if needed
245   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner));
246 
247   // Invert PB diagonal
248   PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values));
249   PetscFunctionReturn(PETSC_SUCCESS);
250 }
251 
252 /**
253   @brief View `MATCEED`.
254 
255   Collective across MPI processes.
256 
257   @param[in]   mat_ceed  `MATCEED` to view
258   @param[in]   viewer    The visualization context
259 
260   @return An error code: 0 - success, otherwise - failure
261 **/
262 static PetscErrorCode MatView_Ceed(Mat mat_ceed, PetscViewer viewer) {
263   PetscBool         is_ascii;
264   PetscViewerFormat format;
265   PetscMPIInt       size, rank;
266   MatCeedContext    ctx;
267 
268   PetscFunctionBeginUser;
269   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
270   PetscCall(MatShellGetContext(mat_ceed, &ctx));
271   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)mat_ceed), &viewer));
272 
273   PetscCall(PetscViewerGetFormat(viewer, &format));
274   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat_ceed), &size));
275   if (size == 1 && format == PETSC_VIEWER_LOAD_BALANCE) PetscFunctionReturn(PETSC_SUCCESS);
276 
277   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat_ceed), &rank));
278   if (rank != 0) PetscFunctionReturn(PETSC_SUCCESS);
279 
280   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &is_ascii));
281   {
282     PetscBool is_detailed     = format == PETSC_VIEWER_ASCII_INFO_DETAIL;
283     char      rank_string[16] = {'\0'};
284     FILE     *file;
285 
286     PetscCall(PetscViewerASCIIPrintf(viewer, "MatCEED:\n  Default COO MatType:%s\n", ctx->coo_mat_type));
287     PetscCall(PetscViewerASCIIPushTab(viewer));  // MatCEED
288     PetscCall(PetscSNPrintf(rank_string, 16, "on Rank %d", rank));
289     PetscCall(PetscViewerASCIIPrintf(viewer, "CeedOperator Apply %s:\n", is_detailed ? rank_string : "Summary"));
290     PetscCall(PetscViewerASCIIGetPointer(viewer, &file));
291     PetscCall(PetscViewerASCIIPushTab(viewer));  // CeedOperator
292     if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult, file));
293     else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult, file));
294     PetscCall(PetscViewerASCIIPopTab(viewer));  // CeedOperator
295     if (ctx->op_mult_transpose) {
296       PetscCall(PetscViewerASCIIPrintf(viewer, "CeedOperator ApplyTranspose %s:\n", is_detailed ? rank_string : "Summary"));
297       PetscCall(PetscViewerASCIIPushTab(viewer));  // CeedOperator
298       if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult_transpose, file));
299       else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult_transpose, file));
300       PetscCall(PetscViewerASCIIPopTab(viewer));  // CeedOperator
301     }
302     PetscCall(PetscViewerASCIIPopTab(viewer));  // MatCEED
303   }
304   PetscFunctionReturn(PETSC_SUCCESS);
305 }
306 
307 // -----------------------------------------------------------------------------
308 // MatCeed
309 // -----------------------------------------------------------------------------
310 
311 /**
312   @brief Create PETSc `Mat` from libCEED operators.
313 
314   Collective across MPI processes.
315 
316   @param[in]   dm_x                      Input `DM`
317   @param[in]   dm_y                      Output `DM`
318   @param[in]   op_mult                   `CeedOperator` for forward evaluation
319   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
320   @param[out]  mat                        New MatCeed
321 
322   @return An error code: 0 - success, otherwise - failure
323 **/
324 PetscErrorCode MatCreateCeed(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) {
325   PetscInt       X_l_size, X_g_size, Y_l_size, Y_g_size;
326   VecType        vec_type;
327   MatCeedContext ctx;
328 
329   PetscFunctionBeginUser;
330   PetscCall(MatCeedRegisterLogEvents());
331 
332   // Collect context data
333   PetscCall(DMGetVecType(dm_x, &vec_type));
334   {
335     Vec X;
336 
337     PetscCall(DMGetGlobalVector(dm_x, &X));
338     PetscCall(VecGetSize(X, &X_g_size));
339     PetscCall(VecGetLocalSize(X, &X_l_size));
340     PetscCall(DMRestoreGlobalVector(dm_x, &X));
341   }
342   if (dm_y) {
343     Vec Y;
344 
345     PetscCall(DMGetGlobalVector(dm_y, &Y));
346     PetscCall(VecGetSize(Y, &Y_g_size));
347     PetscCall(VecGetLocalSize(Y, &Y_l_size));
348     PetscCall(DMRestoreGlobalVector(dm_y, &Y));
349   } else {
350     dm_y     = dm_x;
351     Y_g_size = X_g_size;
352     Y_l_size = X_l_size;
353   }
354 
355   // Create context
356   {
357     Vec X_loc, Y_loc_transpose = NULL;
358 
359     PetscCall(DMCreateLocalVector(dm_x, &X_loc));
360     PetscCall(VecZeroEntries(X_loc));
361     if (op_mult_transpose) {
362       PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose));
363       PetscCall(VecZeroEntries(Y_loc_transpose));
364     }
365     PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE,
366                                    MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE_CEEDOP, &ctx));
367     PetscCall(VecDestroy(&X_loc));
368     PetscCall(VecDestroy(&Y_loc_transpose));
369   }
370 
371   // Create mat
372   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat));
373   PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED));
374   // -- Set block and variable block sizes
375   if (dm_x == dm_y) {
376     MatType dm_mat_type, dm_mat_type_copy;
377     Mat     temp_mat;
378 
379     PetscCall(DMGetMatType(dm_x, &dm_mat_type));
380     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
381     PetscCall(DMSetMatType(dm_x, MATAIJ));
382     PetscCall(DMCreateMatrix(dm_x, &temp_mat));
383     PetscCall(DMSetMatType(dm_x, dm_mat_type_copy));
384     PetscCall(PetscFree(dm_mat_type_copy));
385 
386     {
387       PetscInt        block_size, num_blocks, max_vblock_size = PETSC_INT_MAX;
388       const PetscInt *vblock_sizes;
389 
390       // -- Get block sizes
391       PetscCall(MatGetBlockSize(temp_mat, &block_size));
392       PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes));
393       {
394         PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX};
395 
396         for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]);
397         PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max));
398         max_vblock_size = global_min_max[1];
399       }
400 
401       // -- Copy block sizes
402       if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size));
403       if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes));
404 
405       // -- Check libCEED compatibility
406       {
407         bool is_composite;
408 
409         ctx->is_ceed_pbd_valid  = PETSC_TRUE;
410         ctx->is_ceed_vpbd_valid = PETSC_TRUE;
411         PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite));
412         if (is_composite) {
413           CeedInt       num_sub_operators;
414           CeedOperator *sub_operators;
415 
416           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators));
417           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators));
418           for (CeedInt i = 0; i < num_sub_operators; i++) {
419             CeedInt                  num_bases, num_comp;
420             CeedBasis               *active_bases;
421             CeedOperatorAssemblyData assembly_data;
422 
423             PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data));
424             PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
425             PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
426             if (num_bases > 1) {
427               ctx->is_ceed_pbd_valid  = PETSC_FALSE;
428               ctx->is_ceed_vpbd_valid = PETSC_FALSE;
429             }
430             if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
431             if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
432           }
433         } else {
434           // LCOV_EXCL_START
435           CeedInt                  num_bases, num_comp;
436           CeedBasis               *active_bases;
437           CeedOperatorAssemblyData assembly_data;
438 
439           PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data));
440           PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
441           PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
442           if (num_bases > 1) {
443             ctx->is_ceed_pbd_valid  = PETSC_FALSE;
444             ctx->is_ceed_vpbd_valid = PETSC_FALSE;
445           }
446           if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
447           if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
448           // LCOV_EXCL_STOP
449         }
450         {
451           PetscInt local_is_valid[2], global_is_valid[2];
452 
453           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid;
454           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
455           ctx->is_ceed_pbd_valid = global_is_valid[0];
456           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid;
457           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
458           ctx->is_ceed_vpbd_valid = global_is_valid[0];
459         }
460       }
461     }
462     PetscCall(MatDestroy(&temp_mat));
463   }
464   // -- Set internal mat type
465   {
466     VecType vec_type;
467     MatType coo_mat_type;
468 
469     PetscCall(VecGetType(ctx->X_loc, &vec_type));
470     if (strstr(vec_type, VECCUDA)) coo_mat_type = MATAIJCUSPARSE;
471     else if (strstr(vec_type, VECKOKKOS)) coo_mat_type = MATAIJKOKKOS;
472     else coo_mat_type = MATAIJ;
473     PetscCall(PetscStrallocpy(coo_mat_type, &ctx->coo_mat_type));
474   }
475   // -- Set mat operations
476   PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
477   PetscCall(MatShellSetOperation(*mat, MATOP_VIEW, (void (*)(void))MatView_Ceed));
478   PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed));
479   if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
480   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
481   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
482   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
483   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
484   PetscCall(MatShellSetVecType(*mat, vec_type));
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 /**
489   @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`.
490 
491   Collective across MPI processes.
492 
493   @param[in]   mat_ceed   `MATCEED` to copy from
494   @param[out]  mat_other  `MatShell` or `MATCEED` to copy into
495 
496   @return An error code: 0 - success, otherwise - failure
497 **/
498 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) {
499   PetscFunctionBeginUser;
500   PetscCall(MatCeedRegisterLogEvents());
501 
502   // Check type compatibility
503   {
504     PetscBool is_matceed = PETSC_FALSE, is_matshell = PETSC_FALSE;
505     MatType   mat_type_ceed, mat_type_other;
506 
507     PetscCall(MatGetType(mat_ceed, &mat_type_ceed));
508     PetscCall(PetscStrcmp(mat_type_ceed, MATCEED, &is_matceed));
509     PetscCheck(is_matceed, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED);
510     PetscCall(MatGetType(mat_other, &mat_type_other));
511     PetscCall(PetscStrcmp(mat_type_other, MATCEED, &is_matceed));
512     PetscCall(PetscStrcmp(mat_type_other, MATSHELL, &is_matceed));
513     PetscCheck(is_matceed || is_matshell, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_other must have type " MATCEED " or " MATSHELL);
514   }
515 
516   // Check dimension compatibility
517   {
518     PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size;
519 
520     PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size));
521     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size));
522     PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size));
523     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size));
524     PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) &&
525                    (X_l_ceed_size == X_l_other_size),
526                PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ,
527                "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT
528                "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT
529                ", %" PetscInt_FMT ")",
530                Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size);
531   }
532 
533   // Convert
534   {
535     VecType        vec_type;
536     MatCeedContext ctx;
537 
538     PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED));
539     PetscCall(MatShellGetContext(mat_ceed, &ctx));
540     PetscCall(MatCeedContextReference(ctx));
541     PetscCall(MatShellSetContext(mat_other, ctx));
542     PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
543     PetscCall(MatShellSetOperation(mat_other, MATOP_VIEW, (void (*)(void))MatView_Ceed));
544     PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed));
545     if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
546     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
547     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
548     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
549     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
550     {
551       PetscInt block_size;
552 
553       PetscCall(MatGetBlockSize(mat_ceed, &block_size));
554       if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size));
555     }
556     {
557       PetscInt        num_blocks;
558       const PetscInt *block_sizes;
559 
560       PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes));
561       if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes));
562     }
563     PetscCall(DMGetVecType(ctx->dm_x, &vec_type));
564     PetscCall(MatShellSetVecType(mat_other, vec_type));
565   }
566   PetscFunctionReturn(PETSC_SUCCESS);
567 }
568 
569 /**
570   @brief Mark `CeedQFunction` data as updated and the `CeedQFunction` as requiring re-assembly for a `MatCEED`.
571 
572   Collective across MPI processes.
573 
574   @param[in]   mat_ceed       `MATCEED`
575   @param[out]  update_needed  Boolean flag indicating `CeedQFunction` update needed
576 
577   @return An error code: 0 - success, otherwise - failure
578 **/
579 PetscErrorCode MatCeedSetAssemblyDataUpdateNeeded(Mat mat_ceed, PetscBool update_needed) {
580   MatCeedContext ctx;
581 
582   PetscFunctionBeginUser;
583   PetscCall(MatShellGetContext(mat_ceed, &ctx));
584   PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult, update_needed));
585   if (ctx->op_mult_transpose) {
586     PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult_transpose, update_needed));
587   }
588   if (update_needed) {
589     PetscCall(MatAssemblyBegin(mat_ceed, MAT_FINAL_ASSEMBLY));
590     PetscCall(MatAssemblyEnd(mat_ceed, MAT_FINAL_ASSEMBLY));
591   }
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 /**
596   @brief Setup a `Mat` with the same COO pattern as a `MatCEED`.
597 
598   Collective across MPI processes.
599 
600   @param[in]   mat_ceed  `MATCEED`
601   @param[out]  mat_coo   Sparse `Mat` with same COO pattern
602 
603   @return An error code: 0 - success, otherwise - failure
604 **/
605 PetscErrorCode MatCeedCreateMatCOO(Mat mat_ceed, Mat *mat_coo) {
606   MatCeedContext ctx;
607 
608   PetscFunctionBeginUser;
609   PetscCall(MatShellGetContext(mat_ceed, &ctx));
610 
611   PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "COO assembly only supported for MATCEED on a single DM");
612 
613   // Check cl mat type
614   {
615     PetscBool is_coo_mat_type_cl = PETSC_FALSE;
616     char      coo_mat_type_cl[64];
617 
618     // Check for specific CL coo mat type for this Mat
619     {
620       const char *mat_ceed_prefix = NULL;
621 
622       PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix));
623       PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL);
624       PetscCall(PetscOptionsFList("-ceed_coo_mat_type", "Default MATCEED COO assembly MatType", NULL, MatList, coo_mat_type_cl, coo_mat_type_cl,
625                                   sizeof(coo_mat_type_cl), &is_coo_mat_type_cl));
626       PetscOptionsEnd();
627       if (is_coo_mat_type_cl) {
628         PetscCall(PetscFree(ctx->coo_mat_type));
629         PetscCall(PetscStrallocpy(coo_mat_type_cl, &ctx->coo_mat_type));
630       }
631     }
632   }
633 
634   // Create sparse matrix
635   {
636     MatType dm_mat_type, dm_mat_type_copy;
637 
638     PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type));
639     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
640     PetscCall(DMSetMatType(ctx->dm_x, ctx->coo_mat_type));
641     PetscCall(DMCreateMatrix(ctx->dm_x, mat_coo));
642     PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy));
643     PetscCall(PetscFree(dm_mat_type_copy));
644   }
645   PetscFunctionReturn(PETSC_SUCCESS);
646 }
647 
648 /**
649   @brief Setup the COO preallocation `MATCEED` into a `MATAIJ` or similar.
650          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
651 
652   Collective across MPI processes.
653 
654   @param[in]      mat_ceed  `MATCEED` to assemble
655   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
656 
657   @return An error code: 0 - success, otherwise - failure
658 **/
659 PetscErrorCode MatCeedSetPreallocationCOO(Mat mat_ceed, Mat mat_coo) {
660   MatCeedContext ctx;
661 
662   PetscFunctionBeginUser;
663   PetscCall(MatShellGetContext(mat_ceed, &ctx));
664 
665   {
666     PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
667     CeedInt      *rows_ceed, *cols_ceed;
668     PetscCount    num_entries;
669     PetscLogStage stage_amg_setup;
670 
671     // -- Assemble sparsity pattern if mat hasn't been assembled before
672     PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
673     if (stage_amg_setup == -1) {
674       PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
675     }
676     PetscCall(PetscLogStagePush(stage_amg_setup));
677     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
678     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
679     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
680     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
681     PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
682     PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
683     PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
684     free(rows_petsc);
685     free(cols_petsc);
686     if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full));
687     PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full));
688     ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo;
689     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
690     PetscCall(PetscLogStagePop());
691   }
692   PetscFunctionReturn(PETSC_SUCCESS);
693 }
694 
695 /**
696   @brief Assemble a `MATCEED` into a `MATAIJ` or similar.
697          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
698          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
699 
700   Collective across MPI processes.
701 
702   @param[in]      mat_ceed  `MATCEED` to assemble
703   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
704 
705   @return An error code: 0 - success, otherwise - failure
706 **/
707 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) {
708   MatCeedContext ctx;
709 
710   PetscFunctionBeginUser;
711   PetscCall(MatShellGetContext(mat_ceed, &ctx));
712 
713   // Set COO pattern if needed
714   {
715     CeedInt index = -1;
716 
717     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
718       if (ctx->mats_assembled_full[i] == mat_coo) index = i;
719     }
720     if (index == -1) PetscCall(MatCeedSetPreallocationCOO(mat_ceed, mat_coo));
721   }
722 
723   // Assemble mat_ceed
724   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
725   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
726   {
727     const CeedScalar *values;
728     MatType           mat_type;
729     CeedMemType       mem_type = CEED_MEM_HOST;
730     PetscBool         is_spd, is_spd_known;
731 
732     PetscCall(MatGetType(mat_coo, &mat_type));
733     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
734     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
735     else mem_type = CEED_MEM_HOST;
736 
737     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
738     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full));
739     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
740     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values));
741     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
742     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
743     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
744     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values));
745   }
746   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
747   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
748   PetscFunctionReturn(PETSC_SUCCESS);
749 }
750 
751 /**
752   @brief Set the current value of a context field for a `MatCEED`.
753 
754   Not collective across MPI processes.
755 
756   @param[in,out]  mat    `MatCEED`
757   @param[in]      name   Name of the context field
758   @param[in]      value  New context field value
759 
760   @return An error code: 0 - success, otherwise - failure
761 **/
762 PetscErrorCode MatCeedSetContextDouble(Mat mat, const char *name, double value) {
763   PetscBool      was_updated = PETSC_FALSE;
764   MatCeedContext ctx;
765 
766   PetscFunctionBeginUser;
767   PetscCall(MatShellGetContext(mat, &ctx));
768   {
769     CeedContextFieldLabel label = NULL;
770 
771     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult, name, &label));
772     if (label) {
773       double set_value = 2 * value + 1.0;
774 
775       PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
776       if (set_value != value) {
777         PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult, label, &value));
778         was_updated = PETSC_TRUE;
779       }
780     }
781     if (ctx->op_mult_transpose) {
782       label = NULL;
783       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult_transpose, name, &label));
784       if (label) {
785         double set_value = 2 * value + 1.0;
786 
787         PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
788         if (set_value != value) {
789           PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult_transpose, label, &value));
790           was_updated = PETSC_TRUE;
791         }
792       }
793     }
794   }
795   if (was_updated) {
796     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
797     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
798   }
799   PetscFunctionReturn(PETSC_SUCCESS);
800 }
801 
802 /**
803   @brief Get the current value of a context field for a `MatCEED`.
804 
805   Not collective across MPI processes.
806 
807   @param[in]   mat    `MatCEED`
808   @param[in]   name   Name of the context field
809   @param[out]  value  Current context field value
810 
811   @return An error code: 0 - success, otherwise - failure
812 **/
813 PetscErrorCode MatCeedGetContextDouble(Mat mat, const char *name, double *value) {
814   MatCeedContext ctx;
815 
816   PetscFunctionBeginUser;
817   PetscCall(MatShellGetContext(mat, &ctx));
818   {
819     CeedContextFieldLabel label = NULL;
820     CeedOperator          op    = ctx->op_mult;
821 
822     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
823     if (!label && ctx->op_mult_transpose) {
824       op = ctx->op_mult_transpose;
825       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
826     }
827     if (label) {
828       PetscSizeT    num_values;
829       const double *values_ceed;
830 
831       PetscCallCeed(ctx->ceed, CeedOperatorGetContextDoubleRead(op, label, &num_values, &values_ceed));
832       *value = values_ceed[0];
833       PetscCallCeed(ctx->ceed, CeedOperatorRestoreContextDoubleRead(op, label, &values_ceed));
834     }
835   }
836   PetscFunctionReturn(PETSC_SUCCESS);
837 }
838 
839 /**
840   @brief Set the current `PetscReal` value of a context field for a `MatCEED`.
841 
842   Not collective across MPI processes.
843 
844   @param[in,out]  mat    `MatCEED`
845   @param[in]      name   Name of the context field
846   @param[in]      value  New context field value
847 
848   @return An error code: 0 - success, otherwise - failure
849 **/
850 PetscErrorCode MatCeedSetContextReal(Mat mat, const char *name, PetscReal value) {
851   double value_double = value;
852 
853   PetscFunctionBeginUser;
854   PetscCall(MatCeedSetContextDouble(mat, name, value_double));
855   PetscFunctionReturn(PETSC_SUCCESS);
856 }
857 
858 /**
859   @brief Get the current `PetscReal` value of a context field for a `MatCEED`.
860 
861   Not collective across MPI processes.
862 
863   @param[in]   mat    `MatCEED`
864   @param[in]   name   Name of the context field
865   @param[out]  value  Current context field value
866 
867   @return An error code: 0 - success, otherwise - failure
868 **/
869 PetscErrorCode MatCeedGetContextReal(Mat mat, const char *name, PetscReal *value) {
870   double value_double = 0.0;
871 
872   PetscFunctionBeginUser;
873   PetscCall(MatCeedGetContextDouble(mat, name, &value_double));
874   *value = value_double;
875   PetscFunctionReturn(PETSC_SUCCESS);
876 }
877 
878 /**
879   @brief Set the current time for a `MatCEED`.
880 
881   Not collective across MPI processes.
882 
883   @param[in,out]  mat   `MatCEED`
884   @param[in]      time  Current time
885 
886   @return An error code: 0 - success, otherwise - failure
887 **/
888 PetscErrorCode MatCeedSetTime(Mat mat, PetscReal time) {
889   PetscFunctionBeginUser;
890   {
891     double time_ceed = time;
892 
893     PetscCall(MatCeedSetContextDouble(mat, "time", time_ceed));
894   }
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 /**
899   @brief Get the current time for a `MatCEED`.
900 
901   Not collective across MPI processes.
902 
903   @param[in]   mat   `MatCEED`
904   @param[out]  time  Current time, or -1.0 if the boundary evaluator has no time field
905 
906   @return An error code: 0 - success, otherwise - failure
907 **/
908 PetscErrorCode MatCeedGetTime(Mat mat, PetscReal *time) {
909   PetscFunctionBeginUser;
910   *time = -1.0;
911   {
912     double time_ceed = -1.0;
913 
914     PetscCall(MatCeedGetContextDouble(mat, "time", &time_ceed));
915     *time = time_ceed;
916   }
917   PetscFunctionReturn(PETSC_SUCCESS);
918 }
919 
920 /**
921   @brief Set the current time step for a `MatCEED`.
922 
923   Not collective across MPI processes.
924 
925   @param[in,out]  mat  `MatCEED`
926   @param[in]      dt   Current time step
927 
928   @return An error code: 0 - success, otherwise - failure
929 **/
930 PetscErrorCode MatCeedSetDt(Mat mat, PetscReal dt) {
931   PetscFunctionBeginUser;
932   {
933     double dt_ceed = dt;
934 
935     PetscCall(MatCeedSetContextDouble(mat, "dt", dt_ceed));
936   }
937   PetscFunctionReturn(PETSC_SUCCESS);
938 }
939 
940 /**
941   @brief Set the Jacobian shifts for a `MatCEED`.
942 
943   Not collective across MPI processes.
944 
945   @param[in,out]  mat      `MatCEED`
946   @param[in]      shift_v  Velocity shift
947   @param[in]      shift_a  Acceleration shift
948 
949   @return An error code: 0 - success, otherwise - failure
950 **/
951 PetscErrorCode MatCeedSetShifts(Mat mat, PetscReal shift_v, PetscReal shift_a) {
952   PetscFunctionBeginUser;
953   {
954     double shift_v_ceed = shift_v;
955 
956     PetscCall(MatCeedSetContextDouble(mat, "shift v", shift_v_ceed));
957   }
958   if (shift_a) {
959     double shift_a_ceed = shift_a;
960 
961     PetscCall(MatCeedSetContextDouble(mat, "shift a", shift_a_ceed));
962   }
963   PetscFunctionReturn(PETSC_SUCCESS);
964 }
965 
966 /**
967   @brief Set user context for a `MATCEED`.
968 
969   Collective across MPI processes.
970 
971   @param[in,out]  mat  `MATCEED`
972   @param[in]      f    The context destroy function, or NULL
973   @param[in]      ctx  User context, or NULL to unset
974 
975   @return An error code: 0 - success, otherwise - failure
976 **/
977 PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) {
978   PetscContainer user_ctx = NULL;
979 
980   PetscFunctionBeginUser;
981   if (ctx) {
982     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx));
983     PetscCall(PetscContainerSetPointer(user_ctx, ctx));
984     PetscCall(PetscContainerSetUserDestroy(user_ctx, f));
985   }
986   PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx));
987   PetscCall(PetscContainerDestroy(&user_ctx));
988   PetscFunctionReturn(PETSC_SUCCESS);
989 }
990 
991 /**
992   @brief Retrieve the user context for a `MATCEED`.
993 
994   Collective across MPI processes.
995 
996   @param[in,out]  mat  `MATCEED`
997   @param[in]      ctx  User context
998 
999   @return An error code: 0 - success, otherwise - failure
1000 **/
1001 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) {
1002   PetscContainer user_ctx;
1003 
1004   PetscFunctionBeginUser;
1005   PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx));
1006   if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx));
1007   else *(void **)ctx = NULL;
1008   PetscFunctionReturn(PETSC_SUCCESS);
1009 }
1010 /**
1011   @brief Set a user defined matrix operation for a `MATCEED` matrix.
1012 
1013   Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by
1014 `MatCeedSetContext()`.
1015 
1016   Collective across MPI processes.
1017 
1018   @param[in,out]  mat  `MATCEED`
1019   @param[in]      op   Name of the `MatOperation`
1020   @param[in]      g    Function that provides the operation
1021 
1022   @return An error code: 0 - success, otherwise - failure
1023 **/
1024 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) {
1025   PetscFunctionBeginUser;
1026   PetscCall(MatShellSetOperation(mat, op, g));
1027   PetscFunctionReturn(PETSC_SUCCESS);
1028 }
1029 
1030 /**
1031   @brief Sets the default COO matrix type as a string from the `MATCEED`.
1032 
1033   Collective across MPI processes.
1034 
1035   @param[in,out]  mat   `MATCEED`
1036   @param[in]      type  COO `MatType` to set
1037 
1038   @return An error code: 0 - success, otherwise - failure
1039 **/
1040 PetscErrorCode MatCeedSetCOOMatType(Mat mat, MatType type) {
1041   MatCeedContext ctx;
1042 
1043   PetscFunctionBeginUser;
1044   PetscCall(MatShellGetContext(mat, &ctx));
1045   // Check if same
1046   {
1047     size_t    len_old, len_new;
1048     PetscBool is_same = PETSC_FALSE;
1049 
1050     PetscCall(PetscStrlen(ctx->coo_mat_type, &len_old));
1051     PetscCall(PetscStrlen(type, &len_new));
1052     if (len_old == len_new) PetscCall(PetscStrncmp(ctx->coo_mat_type, type, len_old, &is_same));
1053     if (is_same) PetscFunctionReturn(PETSC_SUCCESS);
1054   }
1055   // Clean up old mats in different format
1056   // LCOV_EXCL_START
1057   if (ctx->mat_assembled_full_internal) {
1058     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
1059       if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) {
1060         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) {
1061           ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j];
1062         }
1063         ctx->num_mats_assembled_full--;
1064         // Note: we'll realloc this array again, so no need to shrink the allocation
1065         PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1066       }
1067     }
1068   }
1069   if (ctx->mat_assembled_pbd_internal) {
1070     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
1071       if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) {
1072         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) {
1073           ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j];
1074         }
1075         // Note: we'll realloc this array again, so no need to shrink the allocation
1076         ctx->num_mats_assembled_pbd--;
1077         PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1078       }
1079     }
1080   }
1081   PetscCall(PetscFree(ctx->coo_mat_type));
1082   PetscCall(PetscStrallocpy(type, &ctx->coo_mat_type));
1083   PetscFunctionReturn(PETSC_SUCCESS);
1084   // LCOV_EXCL_STOP
1085 }
1086 
1087 /**
1088   @brief Gets the default COO matrix type as a string from the `MATCEED`.
1089 
1090   Collective across MPI processes.
1091 
1092   @param[in,out]  mat   `MATCEED`
1093   @param[in]      type  COO `MatType`
1094 
1095   @return An error code: 0 - success, otherwise - failure
1096 **/
1097 PetscErrorCode MatCeedGetCOOMatType(Mat mat, MatType *type) {
1098   MatCeedContext ctx;
1099 
1100   PetscFunctionBeginUser;
1101   PetscCall(MatShellGetContext(mat, &ctx));
1102   *type = ctx->coo_mat_type;
1103   PetscFunctionReturn(PETSC_SUCCESS);
1104 }
1105 
1106 /**
1107   @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1108 
1109   Not collective across MPI processes.
1110 
1111   @param[in,out]  mat              `MATCEED`
1112   @param[in]      X_loc            Input PETSc local vector, or NULL
1113   @param[in]      Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1114 
1115   @return An error code: 0 - success, otherwise - failure
1116 **/
1117 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) {
1118   MatCeedContext ctx;
1119 
1120   PetscFunctionBeginUser;
1121   PetscCall(MatShellGetContext(mat, &ctx));
1122   if (X_loc) {
1123     PetscInt len_old, len_new;
1124 
1125     PetscCall(VecGetSize(ctx->X_loc, &len_old));
1126     PetscCall(VecGetSize(X_loc, &len_new));
1127     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT,
1128                len_new, len_old);
1129     PetscCall(VecReferenceCopy(X_loc, &ctx->X_loc));
1130   }
1131   if (Y_loc_transpose) {
1132     PetscInt len_old, len_new;
1133 
1134     PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old));
1135     PetscCall(VecGetSize(Y_loc_transpose, &len_new));
1136     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB,
1137                "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old);
1138     PetscCall(VecReferenceCopy(Y_loc_transpose, &ctx->Y_loc_transpose));
1139   }
1140   PetscFunctionReturn(PETSC_SUCCESS);
1141 }
1142 
1143 /**
1144   @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1145 
1146   Not collective across MPI processes.
1147 
1148   @param[in,out]  mat              `MATCEED`
1149   @param[out]     X_loc            Input PETSc local vector, or NULL
1150   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1151 
1152   @return An error code: 0 - success, otherwise - failure
1153 **/
1154 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1155   MatCeedContext ctx;
1156 
1157   PetscFunctionBeginUser;
1158   PetscCall(MatShellGetContext(mat, &ctx));
1159   if (X_loc) {
1160     *X_loc = NULL;
1161     PetscCall(VecReferenceCopy(ctx->X_loc, X_loc));
1162   }
1163   if (Y_loc_transpose) {
1164     *Y_loc_transpose = NULL;
1165     PetscCall(VecReferenceCopy(ctx->Y_loc_transpose, Y_loc_transpose));
1166   }
1167   PetscFunctionReturn(PETSC_SUCCESS);
1168 }
1169 
1170 /**
1171   @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1172 
1173   Not collective across MPI processes.
1174 
1175   @param[in,out]  mat              MatCeed
1176   @param[out]     X_loc            Input PETSc local vector, or NULL
1177   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1178 
1179   @return An error code: 0 - success, otherwise - failure
1180 **/
1181 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1182   PetscFunctionBeginUser;
1183   if (X_loc) PetscCall(VecDestroy(X_loc));
1184   if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose));
1185   PetscFunctionReturn(PETSC_SUCCESS);
1186 }
1187 
1188 /**
1189   @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1190 
1191   Not collective across MPI processes.
1192 
1193   @param[in,out]  mat                MatCeed
1194   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1195   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1196 
1197   @return An error code: 0 - success, otherwise - failure
1198 **/
1199 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1200   MatCeedContext ctx;
1201 
1202   PetscFunctionBeginUser;
1203   PetscCall(MatShellGetContext(mat, &ctx));
1204   if (op_mult) {
1205     *op_mult = NULL;
1206     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult));
1207   }
1208   if (op_mult_transpose) {
1209     *op_mult_transpose = NULL;
1210     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose));
1211   }
1212   PetscFunctionReturn(PETSC_SUCCESS);
1213 }
1214 
1215 /**
1216   @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1217 
1218   Not collective across MPI processes.
1219 
1220   @param[in,out]  mat                MatCeed
1221   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1222   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1223 
1224   @return An error code: 0 - success, otherwise - failure
1225 **/
1226 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1227   MatCeedContext ctx;
1228 
1229   PetscFunctionBeginUser;
1230   PetscCall(MatShellGetContext(mat, &ctx));
1231   if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult));
1232   if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose));
1233   PetscFunctionReturn(PETSC_SUCCESS);
1234 }
1235 
1236 /**
1237   @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1238 
1239   Not collective across MPI processes.
1240 
1241   @param[in,out]  mat                       MatCeed
1242   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1243   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1244 
1245   @return An error code: 0 - success, otherwise - failure
1246 **/
1247 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1248   MatCeedContext ctx;
1249 
1250   PetscFunctionBeginUser;
1251   PetscCall(MatShellGetContext(mat, &ctx));
1252   if (log_event_mult) ctx->log_event_mult = log_event_mult;
1253   if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose;
1254   PetscFunctionReturn(PETSC_SUCCESS);
1255 }
1256 
1257 /**
1258   @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1259 
1260   Not collective across MPI processes.
1261 
1262   @param[in,out]  mat                       MatCeed
1263   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1264   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1265 
1266   @return An error code: 0 - success, otherwise - failure
1267 **/
1268 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1269   MatCeedContext ctx;
1270 
1271   PetscFunctionBeginUser;
1272   PetscCall(MatShellGetContext(mat, &ctx));
1273   if (log_event_mult) *log_event_mult = ctx->log_event_mult;
1274   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose;
1275   PetscFunctionReturn(PETSC_SUCCESS);
1276 }
1277 
1278 /**
1279   @brief Set `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1280 
1281   Not collective across MPI processes.
1282 
1283   @param[in,out]  mat                       MatCeed
1284   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1285   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1286 
1287   @return An error code: 0 - success, otherwise - failure
1288 **/
1289 PetscErrorCode MatCeedSetCeedOperatorLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1290   MatCeedContext ctx;
1291 
1292   PetscFunctionBeginUser;
1293   PetscCall(MatShellGetContext(mat, &ctx));
1294   if (log_event_mult) ctx->log_event_ceed_mult = log_event_mult;
1295   if (log_event_mult_transpose) ctx->log_event_ceed_mult_transpose = log_event_mult_transpose;
1296   PetscFunctionReturn(PETSC_SUCCESS);
1297 }
1298 
1299 /**
1300   @brief Get `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1301 
1302   Not collective across MPI processes.
1303 
1304   @param[in,out]  mat                       MatCeed
1305   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1306   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1307 
1308   @return An error code: 0 - success, otherwise - failure
1309 **/
1310 PetscErrorCode MatCeedGetCeedOperatorLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1311   MatCeedContext ctx;
1312 
1313   PetscFunctionBeginUser;
1314   PetscCall(MatShellGetContext(mat, &ctx));
1315   if (log_event_mult) *log_event_mult = ctx->log_event_ceed_mult;
1316   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_ceed_mult_transpose;
1317   PetscFunctionReturn(PETSC_SUCCESS);
1318 }
1319 
1320 // -----------------------------------------------------------------------------
1321 // Operator context data
1322 // -----------------------------------------------------------------------------
1323 
1324 /**
1325   @brief Setup context data for operator application.
1326 
1327   Collective across MPI processes.
1328 
1329   @param[in]   dm_x                           Input `DM`
1330   @param[in]   dm_y                           Output `DM`
1331   @param[in]   X_loc                          Input PETSc local vector, or NULL
1332   @param[in]   Y_loc_transpose                Input PETSc local vector for transpose operation, or NULL
1333   @param[in]   op_mult                        `CeedOperator` for forward evaluation
1334   @param[in]   op_mult_transpose              `CeedOperator` for transpose evaluation
1335   @param[in]   log_event_mult                 `PetscLogEvent` for forward evaluation
1336   @param[in]   log_event_mult_transpose       `PetscLogEvent` for transpose evaluation
1337   @param[in]   log_event_ceed_mult            `PetscLogEvent` for forward `CeedOperator` evaluation
1338   @param[in]   log_event_ceed_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation
1339   @param[out]  ctx                            Context data for operator evaluation
1340 
1341   @return An error code: 0 - success, otherwise - failure
1342 **/
1343 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose,
1344                                     PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, PetscLogEvent log_event_ceed_mult,
1345                                     PetscLogEvent log_event_ceed_mult_transpose, MatCeedContext *ctx) {
1346   CeedSize x_loc_len, y_loc_len;
1347 
1348   PetscFunctionBeginUser;
1349 
1350   // Allocate
1351   PetscCall(PetscNew(ctx));
1352   (*ctx)->ref_count = 1;
1353 
1354   // Logging
1355   (*ctx)->log_event_mult                = log_event_mult;
1356   (*ctx)->log_event_mult_transpose      = log_event_mult_transpose;
1357   (*ctx)->log_event_ceed_mult           = log_event_ceed_mult;
1358   (*ctx)->log_event_ceed_mult_transpose = log_event_ceed_mult_transpose;
1359 
1360   // PETSc objects
1361   PetscCall(DMReferenceCopy(dm_x, &(*ctx)->dm_x));
1362   PetscCall(DMReferenceCopy(dm_y, &(*ctx)->dm_y));
1363   if (X_loc) PetscCall(VecReferenceCopy(X_loc, &(*ctx)->X_loc));
1364   if (Y_loc_transpose) PetscCall(VecReferenceCopy(Y_loc_transpose, &(*ctx)->Y_loc_transpose));
1365 
1366   // Memtype
1367   {
1368     const PetscScalar *x;
1369     Vec                X;
1370 
1371     PetscCall(DMGetLocalVector(dm_x, &X));
1372     PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type));
1373     PetscCall(VecRestoreArrayReadAndMemType(X, &x));
1374     PetscCall(DMRestoreLocalVector(dm_x, &X));
1375   }
1376 
1377   // libCEED objects
1378   PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB,
1379              "retrieving Ceed context object failed");
1380   PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed));
1381   PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len));
1382   PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult));
1383   if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose));
1384   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc));
1385   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc));
1386 
1387   // Flop counting
1388   {
1389     CeedSize ceed_flops_estimate = 0;
1390 
1391     PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate));
1392     (*ctx)->flops_mult = ceed_flops_estimate;
1393     if (op_mult_transpose) {
1394       PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate));
1395       (*ctx)->flops_mult_transpose = ceed_flops_estimate;
1396     }
1397   }
1398 
1399   // Check sizes
1400   if (x_loc_len > 0 || y_loc_len > 0) {
1401     CeedSize ctx_x_loc_len, ctx_y_loc_len;
1402     PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len;
1403     Vec      dm_X_loc, dm_Y_loc;
1404 
1405     // -- Input
1406     PetscCall(DMGetLocalVector(dm_x, &dm_X_loc));
1407     PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len));
1408     PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc));
1409     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len));
1410     if (X_loc) {
1411       PetscCall(VecGetLocalSize(X_loc, &X_loc_len));
1412       PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1413                  "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len);
1414     }
1415     PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions",
1416                x_loc_len, dm_x_loc_len);
1417     PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")",
1418                x_loc_len, ctx_x_loc_len);
1419 
1420     // -- Output
1421     PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc));
1422     PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len));
1423     PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc));
1424     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len));
1425     PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions",
1426                ctx_y_loc_len, dm_y_loc_len);
1427 
1428     // -- Transpose
1429     if (Y_loc_transpose) {
1430       PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len));
1431       PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1432                  "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len);
1433     }
1434   }
1435   PetscFunctionReturn(PETSC_SUCCESS);
1436 }
1437 
1438 /**
1439   @brief Increment reference counter for `MATCEED` context.
1440 
1441   Not collective across MPI processes.
1442 
1443   @param[in,out]  ctx  Context data
1444 
1445   @return An error code: 0 - success, otherwise - failure
1446 **/
1447 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) {
1448   PetscFunctionBeginUser;
1449   ctx->ref_count++;
1450   PetscFunctionReturn(PETSC_SUCCESS);
1451 }
1452 
1453 /**
1454   @brief Copy reference for `MATCEED`.
1455          Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`.
1456 
1457   Not collective across MPI processes.
1458 
1459   @param[in]   ctx       Context data
1460   @param[out]  ctx_copy  Copy of pointer to context data
1461 
1462   @return An error code: 0 - success, otherwise - failure
1463 **/
1464 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) {
1465   PetscFunctionBeginUser;
1466   PetscCall(MatCeedContextReference(ctx));
1467   PetscCall(MatCeedContextDestroy(*ctx_copy));
1468   *ctx_copy = ctx;
1469   PetscFunctionReturn(PETSC_SUCCESS);
1470 }
1471 
1472 /**
1473   @brief Destroy context data for operator application.
1474 
1475   Collective across MPI processes.
1476 
1477   @param[in,out]  ctx  Context data for operator evaluation
1478 
1479   @return An error code: 0 - success, otherwise - failure
1480 **/
1481 PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) {
1482   PetscFunctionBeginUser;
1483   if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS);
1484 
1485   // PETSc objects
1486   PetscCall(DMDestroy(&ctx->dm_x));
1487   PetscCall(DMDestroy(&ctx->dm_y));
1488   PetscCall(VecDestroy(&ctx->X_loc));
1489   PetscCall(VecDestroy(&ctx->Y_loc_transpose));
1490   PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1491   PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1492   PetscCall(PetscFree(ctx->coo_mat_type));
1493   PetscCall(PetscFree(ctx->mats_assembled_full));
1494   PetscCall(PetscFree(ctx->mats_assembled_pbd));
1495 
1496   // libCEED objects
1497   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc));
1498   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc));
1499   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full));
1500   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd));
1501   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult));
1502   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose));
1503   PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed");
1504 
1505   // Deallocate
1506   ctx->is_destroyed = PETSC_TRUE;  // Flag as destroyed in case someone has stale ref
1507   PetscCall(PetscFree(ctx));
1508   PetscFunctionReturn(PETSC_SUCCESS);
1509 }
1510 
1511 /**
1512   @brief Compute the diagonal of an operator via libCEED.
1513 
1514   Collective across MPI processes.
1515 
1516   @param[in]   A  `MATCEED`
1517   @param[out]  D  Vector holding operator diagonal
1518 
1519   @return An error code: 0 - success, otherwise - failure
1520 **/
1521 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) {
1522   PetscMemType   mem_type;
1523   Vec            D_loc;
1524   MatCeedContext ctx;
1525 
1526   PetscFunctionBeginUser;
1527   PetscCall(MatShellGetContext(A, &ctx));
1528 
1529   // Place PETSc vector in libCEED vector
1530   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1531   PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc));
1532   PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc));
1533 
1534   // Compute Diagonal
1535   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1536   PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1537   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1538 
1539   // Restore PETSc vector
1540   PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc));
1541 
1542   // Local-to-Global
1543   PetscCall(VecZeroEntries(D));
1544   PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D));
1545   PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc));
1546   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1547   PetscFunctionReturn(PETSC_SUCCESS);
1548 }
1549 
1550 /**
1551   @brief Compute `A X = Y` for a `MATCEED`.
1552 
1553   Collective across MPI processes.
1554 
1555   @param[in]   A  `MATCEED`
1556   @param[in]   X  Input PETSc vector
1557   @param[out]  Y  Output PETSc vector
1558 
1559   @return An error code: 0 - success, otherwise - failure
1560 **/
1561 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) {
1562   MatCeedContext ctx;
1563 
1564   PetscFunctionBeginUser;
1565   PetscCall(MatShellGetContext(A, &ctx));
1566   PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, NULL));
1567 
1568   {
1569     PetscMemType x_mem_type, y_mem_type;
1570     Vec          X_loc = ctx->X_loc, Y_loc;
1571 
1572     // Get local vectors
1573     if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1574     PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1575 
1576     // Global-to-local
1577     PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc));
1578 
1579     // Setup libCEED vectors
1580     PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1581     PetscCall(VecZeroEntries(Y_loc));
1582     PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1583 
1584     // Apply libCEED operator
1585     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult, A, X, Y, NULL));
1586     PetscCall(PetscLogGpuTimeBegin());
1587     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE));
1588     PetscCall(PetscLogGpuTimeEnd());
1589     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult, A, X, Y, NULL));
1590 
1591     // Restore PETSc vectors
1592     PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1593     PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1594 
1595     // Local-to-global
1596     PetscCall(VecZeroEntries(Y));
1597     PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y));
1598 
1599     // Restore local vectors, as needed
1600     if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1601     PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1602   }
1603 
1604   // Log flops
1605   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult));
1606   else PetscCall(PetscLogFlops(ctx->flops_mult));
1607   PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, NULL));
1608   PetscFunctionReturn(PETSC_SUCCESS);
1609 }
1610 
1611 /**
1612   @brief Compute `A^T Y = X` for a `MATCEED`.
1613 
1614   Collective across MPI processes.
1615 
1616   @param[in]   A  `MATCEED`
1617   @param[in]   Y  Input PETSc vector
1618   @param[out]  X  Output PETSc vector
1619 
1620   @return An error code: 0 - success, otherwise - failure
1621 **/
1622 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) {
1623   MatCeedContext ctx;
1624 
1625   PetscFunctionBeginUser;
1626   PetscCall(MatShellGetContext(A, &ctx));
1627   PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, NULL));
1628 
1629   {
1630     PetscMemType x_mem_type, y_mem_type;
1631     Vec          X_loc, Y_loc = ctx->Y_loc_transpose;
1632 
1633     // Get local vectors
1634     if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1635     PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1636 
1637     // Global-to-local
1638     PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc));
1639 
1640     // Setup libCEED vectors
1641     PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1642     PetscCall(VecZeroEntries(X_loc));
1643     PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1644 
1645     // Apply libCEED operator
1646     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1647     PetscCall(PetscLogGpuTimeBegin());
1648     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1649     PetscCall(PetscLogGpuTimeEnd());
1650     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1651 
1652     // Restore PETSc vectors
1653     PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1654     PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1655 
1656     // Local-to-global
1657     PetscCall(VecZeroEntries(X));
1658     PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X));
1659 
1660     // Restore local vectors, as needed
1661     if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1662     PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1663   }
1664 
1665   // Log flops
1666   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose));
1667   else PetscCall(PetscLogFlops(ctx->flops_mult_transpose));
1668   PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, NULL));
1669   PetscFunctionReturn(PETSC_SUCCESS);
1670 }
1671