xref: /honee/src/mat-ceed.c (revision 8871369a2cddcf60c85a221df3d668293db189b4)
1 /// @file
2 /// MatCEED implementation
3 
4 #include <ceed.h>
5 #include <ceed/backend.h>
6 #include <mat-ceed-impl.h>
7 #include <mat-ceed.h>
8 #include <petsc-ceed-utils.h>
9 #include <petsc-ceed.h>
10 #include <petscdmplex.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 PetscClassId  MATCEED_CLASSID;
15 PetscLogEvent MATCEED_MULT, MATCEED_MULT_TRANSPOSE;
16 
17 /**
18   @brief Register MATCEED log events.
19 
20   Not collective across MPI processes.
21 
22   @return An error code: 0 - success, otherwise - failure
23 **/
24 static PetscErrorCode MatCeedRegisterLogEvents() {
25   static PetscBool registered = PETSC_FALSE;
26 
27   PetscFunctionBeginUser;
28   if (registered) PetscFunctionReturn(PETSC_SUCCESS);
29   PetscCall(PetscClassIdRegister("MATCEED", &MATCEED_CLASSID));
30   PetscCall(PetscLogEventRegister("MATCEED Mult", MATCEED_CLASSID, &MATCEED_MULT));
31   PetscCall(PetscLogEventRegister("MATCEED Mult Transpose", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE));
32   registered = PETSC_TRUE;
33   PetscFunctionReturn(PETSC_SUCCESS);
34 }
35 
36 /**
37   @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar.
38          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
39          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
40 
41   Collective across MPI processes.
42 
43   @param[in]      mat_ceed  `MATCEED` to assemble
44   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
45 
46   @return An error code: 0 - success, otherwise - failure
47 **/
48 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) {
49   MatCeedContext ctx;
50 
51   PetscFunctionBeginUser;
52   PetscCall(MatShellGetContext(mat_ceed, &ctx));
53 
54   // Check if COO pattern set
55   {
56     PetscInt index = -1;
57 
58     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
59       if (ctx->mats_assembled_pbd[i] == mat_coo) index = i;
60     }
61     if (index == -1) {
62       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
63       CeedInt      *rows_ceed, *cols_ceed;
64       PetscCount    num_entries;
65       PetscLogStage stage_amg_setup;
66 
67       // -- Assemble sparsity pattern if mat hasn't been assembled before
68       PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup));
69       if (stage_amg_setup == -1) {
70         PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup));
71       }
72       PetscCall(PetscLogStagePush(stage_amg_setup));
73       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
74       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
75       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
76       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
77       free(rows_petsc);
78       free(cols_petsc);
79       if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd));
80       PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd));
81       ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo;
82       PetscCall(PetscLogStagePop());
83     }
84   }
85 
86   // Assemble mat_ceed
87   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
88   {
89     const CeedScalar *values;
90     MatType           mat_type;
91     CeedMemType       mem_type = CEED_MEM_HOST;
92     PetscBool         is_spd, is_spd_known;
93 
94     PetscCall(MatGetType(mat_coo, &mat_type));
95     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
96     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
97     else mem_type = CEED_MEM_HOST;
98 
99     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE));
100     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values));
101     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
102     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
103     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
104     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values));
105   }
106   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 /**
111   @brief Assemble inner `Mat` for diagonal `PC` operations
112 
113   Collective across MPI processes.
114 
115   @param[in]   mat_ceed      `MATCEED` to invert
116   @param[in]   use_ceed_pbd  Boolean flag to use libCEED PBD assembly
117   @param[out]  mat_inner     Inner `Mat` for diagonal operations
118 
119   @return An error code: 0 - success, otherwise - failure
120 **/
121 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) {
122   MatCeedContext ctx;
123 
124   PetscFunctionBeginUser;
125   PetscCall(MatShellGetContext(mat_ceed, &ctx));
126   if (use_ceed_pbd) {
127     // Check if COO pattern set
128     if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_pbd_internal));
129 
130     // Assemble mat_assembled_full_internal
131     PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal));
132     if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal;
133   } else {
134     // Check if COO pattern set
135     if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
136 
137     // Assemble mat_assembled_full_internal
138     PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
139     if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal;
140   }
141   PetscFunctionReturn(PETSC_SUCCESS);
142 }
143 
144 /**
145   @brief Get `MATCEED` diagonal block for Jacobi.
146 
147   Collective across MPI processes.
148 
149   @param[in]   mat_ceed   `MATCEED` to invert
150   @param[out]  mat_block  The diagonal block matrix
151 
152   @return An error code: 0 - success, otherwise - failure
153 **/
154 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) {
155   Mat            mat_inner = NULL;
156   MatCeedContext ctx;
157 
158   PetscFunctionBeginUser;
159   PetscCall(MatShellGetContext(mat_ceed, &ctx));
160 
161   // Assemble inner mat if needed
162   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
163 
164   // Get block diagonal
165   PetscCall(MatGetDiagonalBlock(mat_inner, mat_block));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 /**
170   @brief Invert `MATCEED` diagonal block for Jacobi.
171 
172   Collective across MPI processes.
173 
174   @param[in]   mat_ceed  `MATCEED` to invert
175   @param[out]  values    The block inverses in column major order
176 
177   @return An error code: 0 - success, otherwise - failure
178 **/
179 static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) {
180   Mat            mat_inner = NULL;
181   MatCeedContext ctx;
182 
183   PetscFunctionBeginUser;
184   PetscCall(MatShellGetContext(mat_ceed, &ctx));
185 
186   // Assemble inner mat if needed
187   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
188 
189   // Invert PB diagonal
190   PetscCall(MatInvertBlockDiagonal(mat_inner, values));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 /**
195   @brief Invert `MATCEED` variable diagonal block for Jacobi.
196 
197   Collective across MPI processes.
198 
199   @param[in]   mat_ceed     `MATCEED` to invert
200   @param[in]   num_blocks   The number of blocks on the process
201   @param[in]   block_sizes  The size of each block on the process
202   @param[out]  values       The block inverses in column major order
203 
204   @return An error code: 0 - success, otherwise - failure
205 **/
206 static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) {
207   Mat            mat_inner = NULL;
208   MatCeedContext ctx;
209 
210   PetscFunctionBeginUser;
211   PetscCall(MatShellGetContext(mat_ceed, &ctx));
212 
213   // Assemble inner mat if needed
214   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner));
215 
216   // Invert PB diagonal
217   PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 /**
222   @brief View `MATCEED`.
223 
224   Collective across MPI processes.
225 
226   @param[in]   mat_ceed  `MATCEED` to view
227   @param[in]   viewer    The visualization context
228 
229   @return An error code: 0 - success, otherwise - failure
230 **/
231 static PetscErrorCode MatView_Ceed(Mat mat_ceed, PetscViewer viewer) {
232   PetscBool         is_ascii;
233   PetscViewerFormat format;
234   PetscMPIInt       size;
235   MatCeedContext    ctx;
236 
237   PetscFunctionBeginUser;
238   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
239   PetscCall(MatShellGetContext(mat_ceed, &ctx));
240   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)mat_ceed), &viewer));
241 
242   PetscCall(PetscViewerGetFormat(viewer, &format));
243   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat_ceed), &size));
244   if (size == 1 && format == PETSC_VIEWER_LOAD_BALANCE) PetscFunctionReturn(PETSC_SUCCESS);
245 
246   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &is_ascii));
247   {
248     FILE *file;
249 
250     PetscCall(PetscViewerASCIIPrintf(viewer, "MatCEED:\n  Default COO MatType:%s\n", ctx->coo_mat_type));
251     PetscCall(PetscViewerASCIIGetPointer(viewer, &file));
252     PetscCall(PetscViewerASCIIPrintf(viewer, " libCEED Operator:\n"));
253     PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult, file));
254     if (ctx->op_mult_transpose) {
255       PetscCall(PetscViewerASCIIPrintf(viewer, "  libCEED Transpose Operator:\n"));
256       PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult_transpose, file));
257     }
258   }
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 // -----------------------------------------------------------------------------
263 // MatCeed
264 // -----------------------------------------------------------------------------
265 
266 /**
267   @brief Create PETSc `Mat` from libCEED operators.
268 
269   Collective across MPI processes.
270 
271   @param[in]   dm_x                      Input `DM`
272   @param[in]   dm_y                      Output `DM`
273   @param[in]   op_mult                   `CeedOperator` for forward evaluation
274   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
275   @param[out]  mat                        New MatCeed
276 
277   @return An error code: 0 - success, otherwise - failure
278 **/
279 PetscErrorCode MatCeedCreate(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) {
280   PetscInt       X_l_size, X_g_size, Y_l_size, Y_g_size;
281   VecType        vec_type;
282   MatCeedContext ctx;
283 
284   PetscFunctionBeginUser;
285   PetscCall(MatCeedRegisterLogEvents());
286 
287   // Collect context data
288   PetscCall(DMGetVecType(dm_x, &vec_type));
289   {
290     Vec X;
291 
292     PetscCall(DMGetGlobalVector(dm_x, &X));
293     PetscCall(VecGetSize(X, &X_g_size));
294     PetscCall(VecGetLocalSize(X, &X_l_size));
295     PetscCall(DMRestoreGlobalVector(dm_x, &X));
296   }
297   if (dm_y) {
298     Vec Y;
299 
300     PetscCall(DMGetGlobalVector(dm_y, &Y));
301     PetscCall(VecGetSize(Y, &Y_g_size));
302     PetscCall(VecGetLocalSize(Y, &Y_l_size));
303     PetscCall(DMRestoreGlobalVector(dm_y, &Y));
304   } else {
305     dm_y     = dm_x;
306     Y_g_size = X_g_size;
307     Y_l_size = X_l_size;
308   }
309 
310   // Create context
311   {
312     Vec X_loc, Y_loc_transpose = NULL;
313 
314     PetscCall(DMCreateLocalVector(dm_x, &X_loc));
315     PetscCall(VecZeroEntries(X_loc));
316     if (op_mult_transpose) {
317       PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose));
318       PetscCall(VecZeroEntries(Y_loc_transpose));
319     }
320     PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE, &ctx));
321     PetscCall(VecDestroy(&X_loc));
322     PetscCall(VecDestroy(&Y_loc_transpose));
323   }
324 
325   // Create mat
326   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat));
327   PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED));
328   // -- Set block and variable block sizes
329   if (dm_x == dm_y) {
330     MatType dm_mat_type, dm_mat_type_copy;
331     Mat     temp_mat;
332 
333     PetscCall(DMGetMatType(dm_x, &dm_mat_type));
334     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
335     PetscCall(DMSetMatType(dm_x, MATAIJ));
336     PetscCall(DMCreateMatrix(dm_x, &temp_mat));
337     PetscCall(DMSetMatType(dm_x, dm_mat_type_copy));
338     PetscCall(PetscFree(dm_mat_type_copy));
339 
340     {
341       PetscInt        block_size, num_blocks, max_vblock_size = PETSC_INT_MAX;
342       const PetscInt *vblock_sizes;
343 
344       // -- Get block sizes
345       PetscCall(MatGetBlockSize(temp_mat, &block_size));
346       PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes));
347       {
348         PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX};
349 
350         for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]);
351         PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max));
352         max_vblock_size = global_min_max[1];
353       }
354 
355       // -- Copy block sizes
356       if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size));
357       if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes));
358 
359       // -- Check libCEED compatibility
360       {
361         bool is_composite;
362 
363         ctx->is_ceed_pbd_valid  = PETSC_TRUE;
364         ctx->is_ceed_vpbd_valid = PETSC_TRUE;
365         PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite));
366         if (is_composite) {
367           CeedInt       num_sub_operators;
368           CeedOperator *sub_operators;
369 
370           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators));
371           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators));
372           for (CeedInt i = 0; i < num_sub_operators; i++) {
373             CeedInt                  num_bases, num_comp;
374             CeedBasis               *active_bases;
375             CeedOperatorAssemblyData assembly_data;
376 
377             PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data));
378             PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
379             PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
380             if (num_bases > 1) {
381               ctx->is_ceed_pbd_valid  = PETSC_FALSE;
382               ctx->is_ceed_vpbd_valid = PETSC_FALSE;
383             }
384             if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
385             if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
386           }
387         } else {
388           // LCOV_EXCL_START
389           CeedInt                  num_bases, num_comp;
390           CeedBasis               *active_bases;
391           CeedOperatorAssemblyData assembly_data;
392 
393           PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data));
394           PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
395           PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
396           if (num_bases > 1) {
397             ctx->is_ceed_pbd_valid  = PETSC_FALSE;
398             ctx->is_ceed_vpbd_valid = PETSC_FALSE;
399           }
400           if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
401           if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
402           // LCOV_EXCL_STOP
403         }
404         {
405           PetscInt local_is_valid[2], global_is_valid[2];
406 
407           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid;
408           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
409           ctx->is_ceed_pbd_valid = global_is_valid[0];
410           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid;
411           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
412           ctx->is_ceed_vpbd_valid = global_is_valid[0];
413         }
414       }
415     }
416     PetscCall(MatDestroy(&temp_mat));
417   }
418   // -- Set internal mat type
419   {
420     VecType vec_type;
421     MatType coo_mat_type;
422 
423     PetscCall(VecGetType(ctx->X_loc, &vec_type));
424     if (strstr(vec_type, VECCUDA)) coo_mat_type = MATAIJCUSPARSE;
425     else if (strstr(vec_type, VECKOKKOS)) coo_mat_type = MATAIJKOKKOS;
426     else coo_mat_type = MATAIJ;
427     PetscCall(PetscStrallocpy(coo_mat_type, &ctx->coo_mat_type));
428   }
429   // -- Set mat operations
430   PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
431   PetscCall(MatShellSetOperation(*mat, MATOP_VIEW, (void (*)(void))MatView_Ceed));
432   PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed));
433   if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
434   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
435   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
436   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
437   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
438   PetscCall(MatShellSetVecType(*mat, vec_type));
439   PetscFunctionReturn(PETSC_SUCCESS);
440 }
441 
442 /**
443   @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`.
444 
445   Collective across MPI processes.
446 
447   @param[in]   mat_ceed   `MATCEED` to copy from
448   @param[out]  mat_other  `MatShell` or `MATCEED` to copy into
449 
450   @return An error code: 0 - success, otherwise - failure
451 **/
452 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) {
453   PetscFunctionBeginUser;
454   PetscCall(MatCeedRegisterLogEvents());
455 
456   // Check type compatibility
457   {
458     PetscBool is_matceed = PETSC_FALSE, is_matshell = PETSC_FALSE;
459     MatType   mat_type_ceed, mat_type_other;
460 
461     PetscCall(MatGetType(mat_ceed, &mat_type_ceed));
462     PetscCall(PetscStrcmp(mat_type_ceed, MATCEED, &is_matceed));
463     PetscCheck(is_matceed, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED);
464     PetscCall(MatGetType(mat_other, &mat_type_other));
465     PetscCall(PetscStrcmp(mat_type_other, MATCEED, &is_matceed));
466     PetscCall(PetscStrcmp(mat_type_other, MATSHELL, &is_matceed));
467     PetscCheck(is_matceed || is_matshell, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_other must have type " MATCEED " or " MATSHELL);
468   }
469 
470   // Check dimension compatibility
471   {
472     PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size;
473 
474     PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size));
475     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size));
476     PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size));
477     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size));
478     PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) &&
479                    (X_l_ceed_size == X_l_other_size),
480                PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ,
481                "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT
482                "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT
483                ", %" PetscInt_FMT ")",
484                Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size);
485   }
486 
487   // Convert
488   {
489     VecType        vec_type;
490     MatCeedContext ctx;
491 
492     PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED));
493     PetscCall(MatShellGetContext(mat_ceed, &ctx));
494     PetscCall(MatCeedContextReference(ctx));
495     PetscCall(MatShellSetContext(mat_other, ctx));
496     PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
497     PetscCall(MatShellSetOperation(mat_other, MATOP_VIEW, (void (*)(void))MatView_Ceed));
498     PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed));
499     if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
500     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
501     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
502     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
503     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
504     {
505       PetscInt block_size;
506 
507       PetscCall(MatGetBlockSize(mat_ceed, &block_size));
508       if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size));
509     }
510     {
511       PetscInt        num_blocks;
512       const PetscInt *block_sizes;
513 
514       PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes));
515       if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes));
516     }
517     PetscCall(DMGetVecType(ctx->dm_x, &vec_type));
518     PetscCall(MatShellSetVecType(mat_other, vec_type));
519   }
520   PetscFunctionReturn(PETSC_SUCCESS);
521 }
522 
523 /**
524   @brief Setup a `Mat` with the same COO pattern as a `MatCEED`.
525 
526   Collective across MPI processes.
527 
528   @param[in]   mat_ceed  `MATCEED`
529   @param[out]  mat_coo   Sparse `Mat` with same COO pattern
530 
531   @return An error code: 0 - success, otherwise - failure
532 **/
533 PetscErrorCode MatCeedCreateMatCOO(Mat mat_ceed, Mat *mat_coo) {
534   MatCeedContext ctx;
535 
536   PetscFunctionBeginUser;
537   PetscCall(MatShellGetContext(mat_ceed, &ctx));
538 
539   PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "COO assembly only supported for MATCEED on a single DM");
540 
541   // Check cl mat type
542   {
543     PetscBool is_coo_mat_type_cl = PETSC_FALSE;
544     char      coo_mat_type_cl[64];
545 
546     // Check for specific CL coo mat type for this Mat
547     {
548       const char *mat_ceed_prefix = NULL;
549 
550       PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix));
551       PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL);
552       PetscCall(PetscOptionsFList("-ceed_coo_mat_type", "Default MATCEED COO assembly MatType", NULL, MatList, coo_mat_type_cl, coo_mat_type_cl,
553                                   sizeof(coo_mat_type_cl), &is_coo_mat_type_cl));
554       PetscOptionsEnd();
555       if (is_coo_mat_type_cl) {
556         PetscCall(PetscFree(ctx->coo_mat_type));
557         PetscCall(PetscStrallocpy(coo_mat_type_cl, &ctx->coo_mat_type));
558       }
559     }
560   }
561 
562   // Create sparse matrix
563   {
564     MatType dm_mat_type, dm_mat_type_copy;
565 
566     PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type));
567     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
568     PetscCall(DMSetMatType(ctx->dm_x, ctx->coo_mat_type));
569     PetscCall(DMCreateMatrix(ctx->dm_x, mat_coo));
570     PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy));
571     PetscCall(PetscFree(dm_mat_type_copy));
572   }
573   PetscFunctionReturn(PETSC_SUCCESS);
574 }
575 
576 /**
577   @brief Setup the COO preallocation `MATCEED` into a `MATAIJ` or similar.
578          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
579 
580   Collective across MPI processes.
581 
582   @param[in]      mat_ceed  `MATCEED` to assemble
583   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
584 
585   @return An error code: 0 - success, otherwise - failure
586 **/
587 PetscErrorCode MatCeedSetPreallocationCOO(Mat mat_ceed, Mat mat_coo) {
588   MatCeedContext ctx;
589 
590   PetscFunctionBeginUser;
591   PetscCall(MatShellGetContext(mat_ceed, &ctx));
592 
593   {
594     PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
595     CeedInt      *rows_ceed, *cols_ceed;
596     PetscCount    num_entries;
597     PetscLogStage stage_amg_setup;
598 
599     // -- Assemble sparsity pattern if mat hasn't been assembled before
600     PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup));
601     if (stage_amg_setup == -1) {
602       PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup));
603     }
604     PetscCall(PetscLogStagePush(stage_amg_setup));
605     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
606     PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
607     PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
608     PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
609     free(rows_petsc);
610     free(cols_petsc);
611     if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full));
612     PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full));
613     ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo;
614     PetscCall(PetscLogStagePop());
615   }
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 /**
620   @brief Assemble a `MATCEED` into a `MATAIJ` or similar.
621          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
622          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
623 
624   Collective across MPI processes.
625 
626   @param[in]      mat_ceed  `MATCEED` to assemble
627   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
628 
629   @return An error code: 0 - success, otherwise - failure
630 **/
631 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) {
632   MatCeedContext ctx;
633 
634   PetscFunctionBeginUser;
635   PetscCall(MatShellGetContext(mat_ceed, &ctx));
636 
637   // Set COO pattern if needed
638   {
639     CeedInt index = -1;
640 
641     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
642       if (ctx->mats_assembled_full[i] == mat_coo) index = i;
643     }
644     if (index == -1) PetscCall(MatCeedSetPreallocationCOO(mat_ceed, mat_coo));
645   }
646 
647   // Assemble mat_ceed
648   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
649   {
650     const CeedScalar *values;
651     MatType           mat_type;
652     CeedMemType       mem_type = CEED_MEM_HOST;
653     PetscBool         is_spd, is_spd_known;
654 
655     PetscCall(MatGetType(mat_coo, &mat_type));
656     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
657     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
658     else mem_type = CEED_MEM_HOST;
659 
660     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full));
661     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values));
662     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
663     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
664     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
665     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values));
666   }
667   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
668   PetscFunctionReturn(PETSC_SUCCESS);
669 }
670 
671 /**
672   @brief Set the current value of a context field for a `MatCEED`.
673 
674   Not collective across MPI processes.
675 
676   @param[in,out]  mat    `MatCEED`
677   @param[in]      name   Name of the context field
678   @param[in]      value  New context field value
679 
680   @return An error code: 0 - success, otherwise - failure
681 **/
682 PetscErrorCode MatCeedSetContextDouble(Mat mat, const char *name, double value) {
683   PetscBool      was_updated = PETSC_FALSE;
684   MatCeedContext ctx;
685 
686   PetscFunctionBeginUser;
687   PetscCall(MatShellGetContext(mat, &ctx));
688   {
689     CeedContextFieldLabel label = NULL;
690 
691     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult, name, &label));
692     if (label) {
693       PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult, label, &value));
694       was_updated = PETSC_TRUE;
695     }
696     if (ctx->op_mult_transpose) {
697       label = NULL;
698       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult_transpose, name, &label));
699       if (label) {
700         PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult_transpose, label, &value));
701         was_updated = PETSC_TRUE;
702       }
703     }
704   }
705   if (was_updated) {
706     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
707     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
708   }
709   PetscFunctionReturn(PETSC_SUCCESS);
710 }
711 
712 /**
713   @brief Set the current `PetscReal` value of a context field for a `MatCEED`.
714 
715   Not collective across MPI processes.
716 
717   @param[in,out]  mat    `MatCEED`
718   @param[in]      name   Name of the context field
719   @param[in]      value  New context field value
720 
721   @return An error code: 0 - success, otherwise - failure
722 **/
723 PetscErrorCode MatCeedSetContextReal(Mat mat, const char *name, PetscReal value) {
724   double value_double = value;
725 
726   PetscFunctionBeginUser;
727   PetscCall(MatCeedSetContextDouble(mat, name, value_double));
728   PetscFunctionReturn(PETSC_SUCCESS);
729 }
730 
731 /**
732   @brief Get the current value of a context field for a `MatCEED`.
733 
734   Not collective across MPI processes.
735 
736   @param[in]   mat    `MatCEED`
737   @param[in]   name   Name of the context field
738   @param[out]  value  Current context field value
739 
740   @return An error code: 0 - success, otherwise - failure
741 **/
742 PetscErrorCode MatCeedGetContextDouble(Mat mat, const char *name, double *value) {
743   MatCeedContext ctx;
744 
745   PetscFunctionBeginUser;
746   PetscCall(MatShellGetContext(mat, &ctx));
747   {
748     CeedContextFieldLabel label = NULL;
749     CeedOperator          op    = ctx->op_mult;
750 
751     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
752     if (!label && ctx->op_mult_transpose) {
753       op = ctx->op_mult_transpose;
754       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
755     }
756     if (label) {
757       PetscSizeT    num_values;
758       const double *values_ceed;
759 
760       PetscCallCeed(ctx->ceed, CeedOperatorGetContextDoubleRead(op, label, &num_values, &values_ceed));
761       *value = values_ceed[0];
762       PetscCallCeed(ctx->ceed, CeedOperatorRestoreContextDoubleRead(op, label, &values_ceed));
763     }
764   }
765   PetscFunctionReturn(PETSC_SUCCESS);
766 }
767 
768 /**
769   @brief Get the current `PetscReal` value of a context field for a `MatCEED`.
770 
771   Not collective across MPI processes.
772 
773   @param[in]   mat    `MatCEED`
774   @param[in]   name   Name of the context field
775   @param[out]  value  Current context field value
776 
777   @return An error code: 0 - success, otherwise - failure
778 **/
779 PetscErrorCode MatCeedGetContextReal(Mat mat, const char *name, PetscReal *value) {
780   double value_double;
781 
782   PetscFunctionBeginUser;
783   PetscCall(MatCeedGetContextDouble(mat, name, &value_double));
784   *value = value_double;
785   PetscFunctionReturn(PETSC_SUCCESS);
786 }
787 
788 /**
789   @brief Set user context for a `MATCEED`.
790 
791   Collective across MPI processes.
792 
793   @param[in,out]  mat  `MATCEED`
794   @param[in]      f    The context destroy function, or NULL
795   @param[in]      ctx  User context, or NULL to unset
796 
797   @return An error code: 0 - success, otherwise - failure
798 **/
799 PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) {
800   PetscContainer user_ctx = NULL;
801 
802   PetscFunctionBeginUser;
803   if (ctx) {
804     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx));
805     PetscCall(PetscContainerSetPointer(user_ctx, ctx));
806     PetscCall(PetscContainerSetUserDestroy(user_ctx, f));
807   }
808   PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx));
809   PetscCall(PetscContainerDestroy(&user_ctx));
810   PetscFunctionReturn(PETSC_SUCCESS);
811 }
812 
813 /**
814   @brief Retrieve the user context for a `MATCEED`.
815 
816   Collective across MPI processes.
817 
818   @param[in,out]  mat  `MATCEED`
819   @param[in]      ctx  User context
820 
821   @return An error code: 0 - success, otherwise - failure
822 **/
823 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) {
824   PetscContainer user_ctx;
825 
826   PetscFunctionBeginUser;
827   PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx));
828   if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx));
829   else *(void **)ctx = NULL;
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 /**
833   @brief Set a user defined matrix operation for a `MATCEED` matrix.
834 
835   Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by
836 `MatCeedSetContext()`.
837 
838   Collective across MPI processes.
839 
840   @param[in,out]  mat  `MATCEED`
841   @param[in]      op   Name of the `MatOperation`
842   @param[in]      g    Function that provides the operation
843 
844   @return An error code: 0 - success, otherwise - failure
845 **/
846 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) {
847   PetscFunctionBeginUser;
848   PetscCall(MatShellSetOperation(mat, op, g));
849   PetscFunctionReturn(PETSC_SUCCESS);
850 }
851 
852 /**
853   @brief Sets the default COO matrix type as a string from the `MATCEED`.
854 
855   Collective across MPI processes.
856 
857   @param[in,out]  mat   `MATCEED`
858   @param[in]      type  COO `MatType` to set
859 
860   @return An error code: 0 - success, otherwise - failure
861 **/
862 PetscErrorCode MatCeedSetCOOMatType(Mat mat, MatType type) {
863   MatCeedContext ctx;
864 
865   PetscFunctionBeginUser;
866   PetscCall(MatShellGetContext(mat, &ctx));
867   // Check if same
868   {
869     size_t    len_old, len_new;
870     PetscBool is_same = PETSC_FALSE;
871 
872     PetscCall(PetscStrlen(ctx->coo_mat_type, &len_old));
873     PetscCall(PetscStrlen(type, &len_new));
874     if (len_old == len_new) PetscCall(PetscStrncmp(ctx->coo_mat_type, type, len_old, &is_same));
875     if (is_same) PetscFunctionReturn(PETSC_SUCCESS);
876   }
877   // Clean up old mats in different format
878   // LCOV_EXCL_START
879   if (ctx->mat_assembled_full_internal) {
880     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
881       if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) {
882         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) {
883           ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j];
884         }
885         ctx->num_mats_assembled_full--;
886         // Note: we'll realloc this array again, so no need to shrink the allocation
887         PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
888       }
889     }
890   }
891   if (ctx->mat_assembled_pbd_internal) {
892     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
893       if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) {
894         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) {
895           ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j];
896         }
897         // Note: we'll realloc this array again, so no need to shrink the allocation
898         ctx->num_mats_assembled_pbd--;
899         PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
900       }
901     }
902   }
903   PetscCall(PetscFree(ctx->coo_mat_type));
904   PetscCall(PetscStrallocpy(type, &ctx->coo_mat_type));
905   PetscFunctionReturn(PETSC_SUCCESS);
906   // LCOV_EXCL_STOP
907 }
908 
909 /**
910   @brief Gets the default COO matrix type as a string from the `MATCEED`.
911 
912   Collective across MPI processes.
913 
914   @param[in,out]  mat   `MATCEED`
915   @param[in]      type  COO `MatType`
916 
917   @return An error code: 0 - success, otherwise - failure
918 **/
919 PetscErrorCode MatCeedGetCOOMatType(Mat mat, MatType *type) {
920   MatCeedContext ctx;
921 
922   PetscFunctionBeginUser;
923   PetscCall(MatShellGetContext(mat, &ctx));
924   *type = ctx->coo_mat_type;
925   PetscFunctionReturn(PETSC_SUCCESS);
926 }
927 
928 /**
929   @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
930 
931   Not collective across MPI processes.
932 
933   @param[in,out]  mat              `MATCEED`
934   @param[in]      X_loc            Input PETSc local vector, or NULL
935   @param[in]      Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
936 
937   @return An error code: 0 - success, otherwise - failure
938 **/
939 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) {
940   MatCeedContext ctx;
941 
942   PetscFunctionBeginUser;
943   PetscCall(MatShellGetContext(mat, &ctx));
944   if (X_loc) {
945     PetscInt len_old, len_new;
946 
947     PetscCall(VecGetSize(ctx->X_loc, &len_old));
948     PetscCall(VecGetSize(X_loc, &len_new));
949     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT,
950                len_new, len_old);
951     PetscCall(VecReferenceCopy(X_loc, &ctx->X_loc));
952   }
953   if (Y_loc_transpose) {
954     PetscInt len_old, len_new;
955 
956     PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old));
957     PetscCall(VecGetSize(Y_loc_transpose, &len_new));
958     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB,
959                "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old);
960     PetscCall(VecReferenceCopy(Y_loc_transpose, &ctx->Y_loc_transpose));
961   }
962   PetscFunctionReturn(PETSC_SUCCESS);
963 }
964 
965 /**
966   @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
967 
968   Not collective across MPI processes.
969 
970   @param[in,out]  mat              `MATCEED`
971   @param[out]     X_loc            Input PETSc local vector, or NULL
972   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
973 
974   @return An error code: 0 - success, otherwise - failure
975 **/
976 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
977   MatCeedContext ctx;
978 
979   PetscFunctionBeginUser;
980   PetscCall(MatShellGetContext(mat, &ctx));
981   if (X_loc) {
982     *X_loc = NULL;
983     PetscCall(VecReferenceCopy(ctx->X_loc, X_loc));
984   }
985   if (Y_loc_transpose) {
986     *Y_loc_transpose = NULL;
987     PetscCall(VecReferenceCopy(ctx->Y_loc_transpose, Y_loc_transpose));
988   }
989   PetscFunctionReturn(PETSC_SUCCESS);
990 }
991 
992 /**
993   @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
994 
995   Not collective across MPI processes.
996 
997   @param[in,out]  mat              MatCeed
998   @param[out]     X_loc            Input PETSc local vector, or NULL
999   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1000 
1001   @return An error code: 0 - success, otherwise - failure
1002 **/
1003 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1004   PetscFunctionBeginUser;
1005   if (X_loc) PetscCall(VecDestroy(X_loc));
1006   if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose));
1007   PetscFunctionReturn(PETSC_SUCCESS);
1008 }
1009 
1010 /**
1011   @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1012 
1013   Not collective across MPI processes.
1014 
1015   @param[in,out]  mat                MatCeed
1016   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1017   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1018 
1019   @return An error code: 0 - success, otherwise - failure
1020 **/
1021 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1022   MatCeedContext ctx;
1023 
1024   PetscFunctionBeginUser;
1025   PetscCall(MatShellGetContext(mat, &ctx));
1026   if (op_mult) {
1027     *op_mult = NULL;
1028     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult));
1029   }
1030   if (op_mult_transpose) {
1031     *op_mult_transpose = NULL;
1032     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose));
1033   }
1034   PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036 
1037 /**
1038   @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1039 
1040   Not collective across MPI processes.
1041 
1042   @param[in,out]  mat                MatCeed
1043   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1044   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1045 
1046   @return An error code: 0 - success, otherwise - failure
1047 **/
1048 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1049   MatCeedContext ctx;
1050 
1051   PetscFunctionBeginUser;
1052   PetscCall(MatShellGetContext(mat, &ctx));
1053   if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult));
1054   if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose));
1055   PetscFunctionReturn(PETSC_SUCCESS);
1056 }
1057 
1058 /**
1059   @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1060 
1061   Not collective across MPI processes.
1062 
1063   @param[in,out]  mat                       MatCeed
1064   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1065   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1066 
1067   @return An error code: 0 - success, otherwise - failure
1068 **/
1069 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1070   MatCeedContext ctx;
1071 
1072   PetscFunctionBeginUser;
1073   PetscCall(MatShellGetContext(mat, &ctx));
1074   if (log_event_mult) ctx->log_event_mult = log_event_mult;
1075   if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose;
1076   PetscFunctionReturn(PETSC_SUCCESS);
1077 }
1078 
1079 /**
1080   @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1081 
1082   Not collective across MPI processes.
1083 
1084   @param[in,out]  mat                       MatCeed
1085   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1086   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1087 
1088   @return An error code: 0 - success, otherwise - failure
1089 **/
1090 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1091   MatCeedContext ctx;
1092 
1093   PetscFunctionBeginUser;
1094   PetscCall(MatShellGetContext(mat, &ctx));
1095   if (log_event_mult) *log_event_mult = ctx->log_event_mult;
1096   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose;
1097   PetscFunctionReturn(PETSC_SUCCESS);
1098 }
1099 
1100 // -----------------------------------------------------------------------------
1101 // Operator context data
1102 // -----------------------------------------------------------------------------
1103 
1104 /**
1105   @brief Setup context data for operator application.
1106 
1107   Collective across MPI processes.
1108 
1109   @param[in]   dm_x                      Input `DM`
1110   @param[in]   dm_y                      Output `DM`
1111   @param[in]   X_loc                     Input PETSc local vector, or NULL
1112   @param[in]   Y_loc_transpose           Input PETSc local vector for transpose operation, or NULL
1113   @param[in]   op_mult                   `CeedOperator` for forward evaluation
1114   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
1115   @param[in]   log_event_mult            `PetscLogEvent` for forward evaluation
1116   @param[in]   log_event_mult_transpose  `PetscLogEvent` for transpose evaluation
1117   @param[out]  ctx                       Context data for operator evaluation
1118 
1119   @return An error code: 0 - success, otherwise - failure
1120 **/
1121 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose,
1122                                     PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, MatCeedContext *ctx) {
1123   CeedSize x_loc_len, y_loc_len;
1124 
1125   PetscFunctionBeginUser;
1126 
1127   // Allocate
1128   PetscCall(PetscNew(ctx));
1129   (*ctx)->ref_count = 1;
1130 
1131   // Logging
1132   (*ctx)->log_event_mult           = log_event_mult;
1133   (*ctx)->log_event_mult_transpose = log_event_mult_transpose;
1134 
1135   // PETSc objects
1136   PetscCall(DMReferenceCopy(dm_x, &(*ctx)->dm_x));
1137   PetscCall(DMReferenceCopy(dm_y, &(*ctx)->dm_y));
1138   if (X_loc) PetscCall(VecReferenceCopy(X_loc, &(*ctx)->X_loc));
1139   if (Y_loc_transpose) PetscCall(VecReferenceCopy(Y_loc_transpose, &(*ctx)->Y_loc_transpose));
1140 
1141   // Memtype
1142   {
1143     const PetscScalar *x;
1144     Vec                X;
1145 
1146     PetscCall(DMGetLocalVector(dm_x, &X));
1147     PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type));
1148     PetscCall(VecRestoreArrayReadAndMemType(X, &x));
1149     PetscCall(DMRestoreLocalVector(dm_x, &X));
1150   }
1151 
1152   // libCEED objects
1153   PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB,
1154              "retrieving Ceed context object failed");
1155   PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed));
1156   PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len));
1157   PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult));
1158   if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose));
1159   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc));
1160   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc));
1161 
1162   // Flop counting
1163   {
1164     CeedSize ceed_flops_estimate = 0;
1165 
1166     PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate));
1167     (*ctx)->flops_mult = ceed_flops_estimate;
1168     if (op_mult_transpose) {
1169       PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate));
1170       (*ctx)->flops_mult_transpose = ceed_flops_estimate;
1171     }
1172   }
1173 
1174   // Check sizes
1175   if (x_loc_len > 0 || y_loc_len > 0) {
1176     CeedSize ctx_x_loc_len, ctx_y_loc_len;
1177     PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len;
1178     Vec      dm_X_loc, dm_Y_loc;
1179 
1180     // -- Input
1181     PetscCall(DMGetLocalVector(dm_x, &dm_X_loc));
1182     PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len));
1183     PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc));
1184     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len));
1185     if (X_loc) {
1186       PetscCall(VecGetLocalSize(X_loc, &X_loc_len));
1187       PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1188                  "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len);
1189     }
1190     PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions",
1191                x_loc_len, dm_x_loc_len);
1192     PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")",
1193                x_loc_len, ctx_x_loc_len);
1194 
1195     // -- Output
1196     PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc));
1197     PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len));
1198     PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc));
1199     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len));
1200     PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions",
1201                ctx_y_loc_len, dm_y_loc_len);
1202 
1203     // -- Transpose
1204     if (Y_loc_transpose) {
1205       PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len));
1206       PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1207                  "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len);
1208     }
1209   }
1210   PetscFunctionReturn(PETSC_SUCCESS);
1211 }
1212 
1213 /**
1214   @brief Increment reference counter for `MATCEED` context.
1215 
1216   Not collective across MPI processes.
1217 
1218   @param[in,out]  ctx  Context data
1219 
1220   @return An error code: 0 - success, otherwise - failure
1221 **/
1222 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) {
1223   PetscFunctionBeginUser;
1224   ctx->ref_count++;
1225   PetscFunctionReturn(PETSC_SUCCESS);
1226 }
1227 
1228 /**
1229   @brief Copy reference for `MATCEED`.
1230          Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`.
1231 
1232   Not collective across MPI processes.
1233 
1234   @param[in]   ctx       Context data
1235   @param[out]  ctx_copy  Copy of pointer to context data
1236 
1237   @return An error code: 0 - success, otherwise - failure
1238 **/
1239 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) {
1240   PetscFunctionBeginUser;
1241   PetscCall(MatCeedContextReference(ctx));
1242   PetscCall(MatCeedContextDestroy(*ctx_copy));
1243   *ctx_copy = ctx;
1244   PetscFunctionReturn(PETSC_SUCCESS);
1245 }
1246 
1247 /**
1248   @brief Destroy context data for operator application.
1249 
1250   Collective across MPI processes.
1251 
1252   @param[in,out]  ctx  Context data for operator evaluation
1253 
1254   @return An error code: 0 - success, otherwise - failure
1255 **/
1256 PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) {
1257   PetscFunctionBeginUser;
1258   if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS);
1259 
1260   // PETSc objects
1261   PetscCall(DMDestroy(&ctx->dm_x));
1262   PetscCall(DMDestroy(&ctx->dm_y));
1263   PetscCall(VecDestroy(&ctx->X_loc));
1264   PetscCall(VecDestroy(&ctx->Y_loc_transpose));
1265   PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1266   PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1267   PetscCall(PetscFree(ctx->coo_mat_type));
1268   PetscCall(PetscFree(ctx->mats_assembled_full));
1269   PetscCall(PetscFree(ctx->mats_assembled_pbd));
1270 
1271   // libCEED objects
1272   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc));
1273   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc));
1274   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full));
1275   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd));
1276   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult));
1277   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose));
1278   PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed");
1279 
1280   // Deallocate
1281   ctx->is_destroyed = PETSC_TRUE;  // Flag as destroyed in case someone has stale ref
1282   PetscCall(PetscFree(ctx));
1283   PetscFunctionReturn(PETSC_SUCCESS);
1284 }
1285 
1286 /**
1287   @brief Compute the diagonal of an operator via libCEED.
1288 
1289   Collective across MPI processes.
1290 
1291   @param[in]   A  `MATCEED`
1292   @param[out]  D  Vector holding operator diagonal
1293 
1294   @return An error code: 0 - success, otherwise - failure
1295 **/
1296 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) {
1297   PetscMemType   mem_type;
1298   Vec            D_loc;
1299   MatCeedContext ctx;
1300 
1301   PetscFunctionBeginUser;
1302   PetscCall(MatShellGetContext(A, &ctx));
1303 
1304   // Place PETSc vector in libCEED vector
1305   PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc));
1306   PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc));
1307 
1308   // Compute Diagonal
1309   PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1310 
1311   // Restore PETSc vector
1312   PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc));
1313 
1314   // Local-to-Global
1315   PetscCall(VecZeroEntries(D));
1316   PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D));
1317   PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc));
1318   PetscFunctionReturn(PETSC_SUCCESS);
1319 }
1320 
1321 /**
1322   @brief Compute `A X = Y` for a `MATCEED`.
1323 
1324   Collective across MPI processes.
1325 
1326   @param[in]   A  `MATCEED`
1327   @param[in]   X  Input PETSc vector
1328   @param[out]  Y  Output PETSc vector
1329 
1330   @return An error code: 0 - success, otherwise - failure
1331 **/
1332 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) {
1333   MatCeedContext ctx;
1334 
1335   PetscFunctionBeginUser;
1336   PetscCall(MatShellGetContext(A, &ctx));
1337   PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, 0));
1338 
1339   {
1340     PetscMemType x_mem_type, y_mem_type;
1341     Vec          X_loc = ctx->X_loc, Y_loc;
1342 
1343     // Get local vectors
1344     if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1345     PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1346 
1347     // Global-to-local
1348     PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc));
1349 
1350     // Setup libCEED vectors
1351     PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1352     PetscCall(VecZeroEntries(Y_loc));
1353     PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1354 
1355     // Apply libCEED operator
1356     PetscCall(PetscLogGpuTimeBegin());
1357     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE));
1358     PetscCall(PetscLogGpuTimeEnd());
1359 
1360     // Restore PETSc vectors
1361     PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1362     PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1363 
1364     // Local-to-global
1365     PetscCall(VecZeroEntries(Y));
1366     PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y));
1367 
1368     // Restore local vectors, as needed
1369     if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1370     PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1371   }
1372 
1373   // Log flops
1374   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult));
1375   else PetscCall(PetscLogFlops(ctx->flops_mult));
1376 
1377   PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, 0));
1378   PetscFunctionReturn(PETSC_SUCCESS);
1379 }
1380 
1381 /**
1382   @brief Compute `A^T Y = X` for a `MATCEED`.
1383 
1384   Collective across MPI processes.
1385 
1386   @param[in]   A  `MATCEED`
1387   @param[in]   Y  Input PETSc vector
1388   @param[out]  X  Output PETSc vector
1389 
1390   @return An error code: 0 - success, otherwise - failure
1391 **/
1392 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) {
1393   MatCeedContext ctx;
1394 
1395   PetscFunctionBeginUser;
1396   PetscCall(MatShellGetContext(A, &ctx));
1397   PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, 0));
1398 
1399   {
1400     PetscMemType x_mem_type, y_mem_type;
1401     Vec          X_loc, Y_loc = ctx->Y_loc_transpose;
1402 
1403     // Get local vectors
1404     if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1405     PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1406 
1407     // Global-to-local
1408     PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc));
1409 
1410     // Setup libCEED vectors
1411     PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1412     PetscCall(VecZeroEntries(X_loc));
1413     PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1414 
1415     // Apply libCEED operator
1416     PetscCall(PetscLogGpuTimeBegin());
1417     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1418     PetscCall(PetscLogGpuTimeEnd());
1419 
1420     // Restore PETSc vectors
1421     PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1422     PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1423 
1424     // Local-to-global
1425     PetscCall(VecZeroEntries(X));
1426     PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X));
1427 
1428     // Restore local vectors, as needed
1429     if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1430     PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1431   }
1432 
1433   // Log flops
1434   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose));
1435   else PetscCall(PetscLogFlops(ctx->flops_mult_transpose));
1436 
1437   PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, 0));
1438   PetscFunctionReturn(PETSC_SUCCESS);
1439 }
1440