xref: /libCEED/examples/fluids/src/mat-ceed.c (revision c86b9822c3a81bea0f9c3f871b41df743ee97bc4)
1c8564c30SJames Wright /// @file
2c8564c30SJames Wright /// MatCeed and it's related operators
3c8564c30SJames Wright 
4d0593705SJames Wright #include <ceed-utils.h>
5c8564c30SJames Wright #include <ceed.h>
6c8564c30SJames Wright #include <ceed/backend.h>
7c8564c30SJames Wright #include <mat-ceed-impl.h>
8c8564c30SJames Wright #include <mat-ceed.h>
9c8564c30SJames Wright #include <petscdmplex.h>
10c8564c30SJames Wright #include <stdlib.h>
11c8564c30SJames Wright #include <string.h>
12c8564c30SJames Wright 
13c8564c30SJames Wright PetscClassId  MATCEED_CLASSID;
14c8564c30SJames Wright PetscLogEvent MATCEED_MULT, MATCEED_MULT_TRANSPOSE;
15c8564c30SJames Wright 
16c8564c30SJames Wright /**
17c8564c30SJames Wright   @brief Register MATCEED log events.
18c8564c30SJames Wright 
19c8564c30SJames Wright   Not collective across MPI processes.
20c8564c30SJames Wright 
21c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
22c8564c30SJames Wright **/
23c8564c30SJames Wright static PetscErrorCode MatCeedRegisterLogEvents() {
24c8564c30SJames Wright   static bool registered = false;
25c8564c30SJames Wright 
26c8564c30SJames Wright   PetscFunctionBeginUser;
27c8564c30SJames Wright   if (registered) PetscFunctionReturn(PETSC_SUCCESS);
28c8564c30SJames Wright   PetscCall(PetscClassIdRegister("MATCEED", &MATCEED_CLASSID));
29c8564c30SJames Wright   PetscCall(PetscLogEventRegister("MATCEED Mult", MATCEED_CLASSID, &MATCEED_MULT));
30c8564c30SJames Wright   PetscCall(PetscLogEventRegister("MATCEED Mult Transpose", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE));
31c8564c30SJames Wright   registered = true;
32c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
33c8564c30SJames Wright }
34c8564c30SJames Wright 
35c8564c30SJames Wright /**
36c8564c30SJames Wright   @brief Setup inner `Mat` for `PC` operations not directly supported by libCEED.
37c8564c30SJames Wright 
38c8564c30SJames Wright   Collective across MPI processes.
39c8564c30SJames Wright 
40c8564c30SJames Wright   @param[in]   mat_ceed   `MATCEED` to setup
41c8564c30SJames Wright   @param[out]  mat_inner  Inner `Mat`
42c8564c30SJames Wright 
43c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
44c8564c30SJames Wright **/
45c8564c30SJames Wright static PetscErrorCode MatCeedSetupInnerMat(Mat mat_ceed, Mat *mat_inner) {
46c8564c30SJames Wright   MatCeedContext ctx;
47c8564c30SJames Wright 
48c8564c30SJames Wright   PetscFunctionBeginUser;
49c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
50c8564c30SJames Wright 
51c8564c30SJames Wright   PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "PC only supported for MATCEED on a single DM");
52c8564c30SJames Wright 
53c8564c30SJames Wright   // Check cl mat type
54c8564c30SJames Wright   {
55c8564c30SJames Wright     PetscBool is_internal_mat_type_cl = PETSC_FALSE;
56c8564c30SJames Wright     char      internal_mat_type_cl[64];
57c8564c30SJames Wright 
58c8564c30SJames Wright     // Check for specific CL inner mat type for this Mat
59c8564c30SJames Wright     {
60c8564c30SJames Wright       const char *mat_ceed_prefix = NULL;
61c8564c30SJames Wright 
62c8564c30SJames Wright       PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix));
63c8564c30SJames Wright       PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL);
64c8564c30SJames Wright       PetscCall(PetscOptionsFList("-ceed_inner_mat_type", "MATCEED inner assembled MatType for PC support", NULL, MatList, internal_mat_type_cl,
65c8564c30SJames Wright                                   internal_mat_type_cl, sizeof(internal_mat_type_cl), &is_internal_mat_type_cl));
66c8564c30SJames Wright       PetscOptionsEnd();
67c8564c30SJames Wright       if (is_internal_mat_type_cl) {
68c8564c30SJames Wright         PetscCall(PetscFree(ctx->internal_mat_type));
69c8564c30SJames Wright         PetscCall(PetscStrallocpy(internal_mat_type_cl, &ctx->internal_mat_type));
70c8564c30SJames Wright       }
71c8564c30SJames Wright     }
72c8564c30SJames Wright   }
73c8564c30SJames Wright 
74c8564c30SJames Wright   // Create sparse matrix
75c8564c30SJames Wright   {
76c8564c30SJames Wright     MatType dm_mat_type, dm_mat_type_copy;
77c8564c30SJames Wright 
78c8564c30SJames Wright     PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type));
79c8564c30SJames Wright     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
80c8564c30SJames Wright     PetscCall(DMSetMatType(ctx->dm_x, ctx->internal_mat_type));
81c8564c30SJames Wright     PetscCall(DMCreateMatrix(ctx->dm_x, mat_inner));
82c8564c30SJames Wright     PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy));
83c8564c30SJames Wright     PetscCall(PetscFree(dm_mat_type_copy));
84c8564c30SJames Wright   }
85c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
86c8564c30SJames Wright }
87c8564c30SJames Wright 
88c8564c30SJames Wright /**
89c8564c30SJames Wright   @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar.
90c8564c30SJames Wright          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
91c8564c30SJames Wright          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
92c8564c30SJames Wright 
93c8564c30SJames Wright   Collective across MPI processes.
94c8564c30SJames Wright 
95c8564c30SJames Wright   @param[in]      mat_ceed  `MATCEED` to assemble
96c8564c30SJames Wright   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
97c8564c30SJames Wright 
98c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
99c8564c30SJames Wright **/
100c8564c30SJames Wright static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) {
101c8564c30SJames Wright   MatCeedContext ctx;
102c8564c30SJames Wright 
103c8564c30SJames Wright   PetscFunctionBeginUser;
104c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
105c8564c30SJames Wright 
106c8564c30SJames Wright   // Check if COO pattern set
107c8564c30SJames Wright   {
108c8564c30SJames Wright     PetscInt index = -1;
109c8564c30SJames Wright 
110c8564c30SJames Wright     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
111c8564c30SJames Wright       if (ctx->mats_assembled_pbd[i] == mat_coo) index = i;
112c8564c30SJames Wright     }
113c8564c30SJames Wright     if (index == -1) {
114c8564c30SJames Wright       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
115c8564c30SJames Wright       CeedInt      *rows_ceed, *cols_ceed;
116c8564c30SJames Wright       PetscCount    num_entries;
117c8564c30SJames Wright       PetscLogStage stage_amg_setup;
118c8564c30SJames Wright 
119c8564c30SJames Wright       // -- Assemble sparsity pattern if mat hasn't been assembled before
120c8564c30SJames Wright       PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup));
121c8564c30SJames Wright       if (stage_amg_setup == -1) {
122c8564c30SJames Wright         PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup));
123c8564c30SJames Wright       }
124c8564c30SJames Wright       PetscCall(PetscLogStagePush(stage_amg_setup));
12554831c5fSJames Wright       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
126d0593705SJames Wright       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
127d0593705SJames Wright       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
128c8564c30SJames Wright       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
129c8564c30SJames Wright       free(rows_petsc);
130c8564c30SJames Wright       free(cols_petsc);
13154831c5fSJames Wright       if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd));
132c8564c30SJames Wright       PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd));
133c8564c30SJames Wright       ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo;
134c8564c30SJames Wright       PetscCall(PetscLogStagePop());
135c8564c30SJames Wright     }
136c8564c30SJames Wright   }
137c8564c30SJames Wright 
138c8564c30SJames Wright   // Assemble mat_ceed
139c8564c30SJames Wright   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
140c8564c30SJames Wright   {
141c8564c30SJames Wright     const CeedScalar *values;
142c8564c30SJames Wright     MatType           mat_type;
143c8564c30SJames Wright     CeedMemType       mem_type = CEED_MEM_HOST;
144c8564c30SJames Wright     PetscBool         is_spd, is_spd_known;
145c8564c30SJames Wright 
146c8564c30SJames Wright     PetscCall(MatGetType(mat_coo, &mat_type));
147c8564c30SJames Wright     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
148c8564c30SJames Wright     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
149c8564c30SJames Wright     else mem_type = CEED_MEM_HOST;
150c8564c30SJames Wright 
15154831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE));
15254831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values));
153c8564c30SJames Wright     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
154c8564c30SJames Wright     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
155c8564c30SJames Wright     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
15654831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values));
157c8564c30SJames Wright   }
158c8564c30SJames Wright   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
159c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
160c8564c30SJames Wright }
161c8564c30SJames Wright 
162c8564c30SJames Wright /**
163c8564c30SJames Wright   @brief Assemble inner `Mat` for diagonal `PC` operations
164c8564c30SJames Wright 
165c8564c30SJames Wright   Collective across MPI processes.
166c8564c30SJames Wright 
167c8564c30SJames Wright   @param[in]   mat_ceed      `MATCEED` to invert
168c8564c30SJames Wright   @param[in]   use_ceed_pbd  Boolean flag to use libCEED PBD assembly
169c8564c30SJames Wright   @param[out]  mat_inner     Inner `Mat` for diagonal operations
170c8564c30SJames Wright 
171c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
172c8564c30SJames Wright **/
173c8564c30SJames Wright static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) {
174c8564c30SJames Wright   MatCeedContext ctx;
175c8564c30SJames Wright 
176c8564c30SJames Wright   PetscFunctionBeginUser;
177c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
178c8564c30SJames Wright   if (use_ceed_pbd) {
179c8564c30SJames Wright     // Check if COO pattern set
180c8564c30SJames Wright     if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedSetupInnerMat(mat_ceed, &ctx->mat_assembled_pbd_internal));
181c8564c30SJames Wright 
182c8564c30SJames Wright     // Assemble mat_assembled_full_internal
183c8564c30SJames Wright     PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal));
184c8564c30SJames Wright     if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal;
185c8564c30SJames Wright   } else {
186c8564c30SJames Wright     // Check if COO pattern set
187c8564c30SJames Wright     if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedSetupInnerMat(mat_ceed, &ctx->mat_assembled_full_internal));
188c8564c30SJames Wright 
189c8564c30SJames Wright     // Assemble mat_assembled_full_internal
190c8564c30SJames Wright     PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
191c8564c30SJames Wright     if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal;
192c8564c30SJames Wright   }
193c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
194c8564c30SJames Wright }
195c8564c30SJames Wright 
196c8564c30SJames Wright /**
197c8564c30SJames Wright   @brief Get `MATCEED` diagonal block for Jacobi.
198c8564c30SJames Wright 
199c8564c30SJames Wright   Collective across MPI processes.
200c8564c30SJames Wright 
201c8564c30SJames Wright   @param[in]   mat_ceed   `MATCEED` to invert
202c8564c30SJames Wright   @param[out]  mat_block  The diagonal block matrix
203c8564c30SJames Wright 
204c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
205c8564c30SJames Wright **/
206c8564c30SJames Wright static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) {
207c8564c30SJames Wright   Mat            mat_inner = NULL;
208c8564c30SJames Wright   MatCeedContext ctx;
209c8564c30SJames Wright 
210c8564c30SJames Wright   PetscFunctionBeginUser;
211c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
212c8564c30SJames Wright 
213c8564c30SJames Wright   // Assemble inner mat if needed
214c8564c30SJames Wright   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
215c8564c30SJames Wright 
216c8564c30SJames Wright   // Get block diagonal
217c8564c30SJames Wright   PetscCall(MatGetDiagonalBlock(mat_inner, mat_block));
218c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
219c8564c30SJames Wright }
220c8564c30SJames Wright 
221c8564c30SJames Wright /**
222c8564c30SJames Wright   @brief Invert `MATCEED` diagonal block for Jacobi.
223c8564c30SJames Wright 
224c8564c30SJames Wright   Collective across MPI processes.
225c8564c30SJames Wright 
226c8564c30SJames Wright   @param[in]   mat_ceed  `MATCEED` to invert
227c8564c30SJames Wright   @param[out]  values    The block inverses in column major order
228c8564c30SJames Wright 
229c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
230c8564c30SJames Wright **/
231c8564c30SJames Wright static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) {
232c8564c30SJames Wright   Mat            mat_inner = NULL;
233c8564c30SJames Wright   MatCeedContext ctx;
234c8564c30SJames Wright 
235c8564c30SJames Wright   PetscFunctionBeginUser;
236c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
237c8564c30SJames Wright 
238c8564c30SJames Wright   // Assemble inner mat if needed
239c8564c30SJames Wright   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
240c8564c30SJames Wright 
241c8564c30SJames Wright   // Invert PB diagonal
242c8564c30SJames Wright   PetscCall(MatInvertBlockDiagonal(mat_inner, values));
243c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
244c8564c30SJames Wright }
245c8564c30SJames Wright 
246c8564c30SJames Wright /**
247c8564c30SJames Wright   @brief Invert `MATCEED` variable diagonal block for Jacobi.
248c8564c30SJames Wright 
249c8564c30SJames Wright   Collective across MPI processes.
250c8564c30SJames Wright 
251c8564c30SJames Wright   @param[in]   mat_ceed     `MATCEED` to invert
252c8564c30SJames Wright   @param[in]   num_blocks   The number of blocks on the process
253c8564c30SJames Wright   @param[in]   block_sizes  The size of each block on the process
254c8564c30SJames Wright   @param[out]  values       The block inverses in column major order
255c8564c30SJames Wright 
256c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
257c8564c30SJames Wright **/
258c8564c30SJames Wright static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) {
259c8564c30SJames Wright   Mat            mat_inner = NULL;
260c8564c30SJames Wright   MatCeedContext ctx;
261c8564c30SJames Wright 
262c8564c30SJames Wright   PetscFunctionBeginUser;
263c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
264c8564c30SJames Wright 
265c8564c30SJames Wright   // Assemble inner mat if needed
266c8564c30SJames Wright   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner));
267c8564c30SJames Wright 
268c8564c30SJames Wright   // Invert PB diagonal
269c8564c30SJames Wright   PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values));
270c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
271c8564c30SJames Wright }
272c8564c30SJames Wright 
273c8564c30SJames Wright // -----------------------------------------------------------------------------
274c8564c30SJames Wright // MatCeed
275c8564c30SJames Wright // -----------------------------------------------------------------------------
276c8564c30SJames Wright 
277c8564c30SJames Wright /**
278c8564c30SJames Wright   @brief Create PETSc `Mat` from libCEED operators.
279c8564c30SJames Wright 
280c8564c30SJames Wright   Collective across MPI processes.
281c8564c30SJames Wright 
282c8564c30SJames Wright   @param[in]   dm_x                      Input `DM`
283c8564c30SJames Wright   @param[in]   dm_y                      Output `DM`
284c8564c30SJames Wright   @param[in]   op_mult                   `CeedOperator` for forward evaluation
285c8564c30SJames Wright   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
286c8564c30SJames Wright   @param[out]  mat                        New MatCeed
287c8564c30SJames Wright 
288c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
289c8564c30SJames Wright **/
290c8564c30SJames Wright PetscErrorCode MatCeedCreate(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) {
291c8564c30SJames Wright   PetscInt       X_l_size, X_g_size, Y_l_size, Y_g_size;
292c8564c30SJames Wright   VecType        vec_type;
293c8564c30SJames Wright   MatCeedContext ctx;
294c8564c30SJames Wright 
295c8564c30SJames Wright   PetscFunctionBeginUser;
296c8564c30SJames Wright   PetscCall(MatCeedRegisterLogEvents());
297c8564c30SJames Wright 
298c8564c30SJames Wright   // Collect context data
299c8564c30SJames Wright   PetscCall(DMGetVecType(dm_x, &vec_type));
300c8564c30SJames Wright   {
301c8564c30SJames Wright     Vec X;
302c8564c30SJames Wright 
303c8564c30SJames Wright     PetscCall(DMGetGlobalVector(dm_x, &X));
304c8564c30SJames Wright     PetscCall(VecGetSize(X, &X_g_size));
305c8564c30SJames Wright     PetscCall(VecGetLocalSize(X, &X_l_size));
306c8564c30SJames Wright     PetscCall(DMRestoreGlobalVector(dm_x, &X));
307c8564c30SJames Wright   }
308c8564c30SJames Wright   if (dm_y) {
309c8564c30SJames Wright     Vec Y;
310c8564c30SJames Wright 
311c8564c30SJames Wright     PetscCall(DMGetGlobalVector(dm_y, &Y));
312c8564c30SJames Wright     PetscCall(VecGetSize(Y, &Y_g_size));
313c8564c30SJames Wright     PetscCall(VecGetLocalSize(Y, &Y_l_size));
314c8564c30SJames Wright     PetscCall(DMRestoreGlobalVector(dm_y, &Y));
315c8564c30SJames Wright   } else {
316c8564c30SJames Wright     dm_y     = dm_x;
317c8564c30SJames Wright     Y_g_size = X_g_size;
318c8564c30SJames Wright     Y_l_size = X_l_size;
319c8564c30SJames Wright   }
320c8564c30SJames Wright   // Create context
321c8564c30SJames Wright   {
322c8564c30SJames Wright     Vec X_loc, Y_loc_transpose = NULL;
323c8564c30SJames Wright 
324c8564c30SJames Wright     PetscCall(DMCreateLocalVector(dm_x, &X_loc));
325c8564c30SJames Wright     PetscCall(VecZeroEntries(X_loc));
326c8564c30SJames Wright     if (op_mult_transpose) {
327c8564c30SJames Wright       PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose));
328c8564c30SJames Wright       PetscCall(VecZeroEntries(Y_loc_transpose));
329c8564c30SJames Wright     }
330c8564c30SJames Wright     PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE, &ctx));
331c8564c30SJames Wright     PetscCall(VecDestroy(&X_loc));
332c8564c30SJames Wright     PetscCall(VecDestroy(&Y_loc_transpose));
333c8564c30SJames Wright   }
334c8564c30SJames Wright 
335c8564c30SJames Wright   // Create mat
336c8564c30SJames Wright   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat));
337c8564c30SJames Wright   PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED));
338c8564c30SJames Wright   // -- Set block and variable block sizes
339c8564c30SJames Wright   if (dm_x == dm_y) {
340c8564c30SJames Wright     MatType dm_mat_type, dm_mat_type_copy;
341c8564c30SJames Wright     Mat     temp_mat;
342c8564c30SJames Wright 
343c8564c30SJames Wright     PetscCall(DMGetMatType(dm_x, &dm_mat_type));
344c8564c30SJames Wright     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
345c8564c30SJames Wright     PetscCall(DMSetMatType(dm_x, MATAIJ));
346c8564c30SJames Wright     PetscCall(DMCreateMatrix(dm_x, &temp_mat));
347c8564c30SJames Wright     PetscCall(DMSetMatType(dm_x, dm_mat_type_copy));
348c8564c30SJames Wright     PetscCall(PetscFree(dm_mat_type_copy));
349c8564c30SJames Wright 
350c8564c30SJames Wright     {
351c8564c30SJames Wright       PetscInt        block_size, num_blocks, max_vblock_size = PETSC_INT_MAX;
352c8564c30SJames Wright       const PetscInt *vblock_sizes;
353c8564c30SJames Wright 
354c8564c30SJames Wright       // -- Get block sizes
355c8564c30SJames Wright       PetscCall(MatGetBlockSize(temp_mat, &block_size));
356c8564c30SJames Wright       PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes));
357c8564c30SJames Wright       {
358c8564c30SJames Wright         PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX};
359c8564c30SJames Wright 
360c8564c30SJames Wright         for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]);
361c8564c30SJames Wright         PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max));
362c8564c30SJames Wright         max_vblock_size = global_min_max[1];
363c8564c30SJames Wright       }
364c8564c30SJames Wright 
365c8564c30SJames Wright       // -- Copy block sizes
366c8564c30SJames Wright       if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size));
367c8564c30SJames Wright       if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes));
368c8564c30SJames Wright 
369c8564c30SJames Wright       // -- Check libCEED compatibility
370c8564c30SJames Wright       {
371c8564c30SJames Wright         bool is_composite;
372c8564c30SJames Wright 
373c8564c30SJames Wright         ctx->is_ceed_pbd_valid  = PETSC_TRUE;
374c8564c30SJames Wright         ctx->is_ceed_vpbd_valid = PETSC_TRUE;
37554831c5fSJames Wright         PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite));
376c8564c30SJames Wright         if (is_composite) {
377c8564c30SJames Wright           CeedInt       num_sub_operators;
378c8564c30SJames Wright           CeedOperator *sub_operators;
379c8564c30SJames Wright 
38054831c5fSJames Wright           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators));
38154831c5fSJames Wright           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators));
382c8564c30SJames Wright           for (CeedInt i = 0; i < num_sub_operators; i++) {
383c8564c30SJames Wright             CeedInt                  num_bases, num_comp;
384c8564c30SJames Wright             CeedBasis               *active_bases;
385c8564c30SJames Wright             CeedOperatorAssemblyData assembly_data;
386c8564c30SJames Wright 
38754831c5fSJames Wright             PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data));
38854831c5fSJames Wright             PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
38954831c5fSJames Wright             PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
390c8564c30SJames Wright             if (num_bases > 1) {
391c8564c30SJames Wright               ctx->is_ceed_pbd_valid  = PETSC_FALSE;
392c8564c30SJames Wright               ctx->is_ceed_vpbd_valid = PETSC_FALSE;
393c8564c30SJames Wright             }
394c8564c30SJames Wright             if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
395c8564c30SJames Wright             if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
396c8564c30SJames Wright           }
397c8564c30SJames Wright         } else {
398c8564c30SJames Wright           // LCOV_EXCL_START
399c8564c30SJames Wright           CeedInt                  num_bases, num_comp;
400c8564c30SJames Wright           CeedBasis               *active_bases;
401c8564c30SJames Wright           CeedOperatorAssemblyData assembly_data;
402c8564c30SJames Wright 
40354831c5fSJames Wright           PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data));
40454831c5fSJames Wright           PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
40554831c5fSJames Wright           PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
406c8564c30SJames Wright           if (num_bases > 1) {
407c8564c30SJames Wright             ctx->is_ceed_pbd_valid  = PETSC_FALSE;
408c8564c30SJames Wright             ctx->is_ceed_vpbd_valid = PETSC_FALSE;
409c8564c30SJames Wright           }
410c8564c30SJames Wright           if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
411c8564c30SJames Wright           if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
412c8564c30SJames Wright           // LCOV_EXCL_STOP
413c8564c30SJames Wright         }
414c8564c30SJames Wright         {
415c8564c30SJames Wright           PetscInt local_is_valid[2], global_is_valid[2];
416c8564c30SJames Wright 
417c8564c30SJames Wright           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid;
418c8564c30SJames Wright           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
419c8564c30SJames Wright           ctx->is_ceed_pbd_valid = global_is_valid[0];
420c8564c30SJames Wright           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid;
421c8564c30SJames Wright           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
422c8564c30SJames Wright           ctx->is_ceed_vpbd_valid = global_is_valid[0];
423c8564c30SJames Wright         }
424c8564c30SJames Wright       }
425c8564c30SJames Wright     }
426c8564c30SJames Wright     PetscCall(MatDestroy(&temp_mat));
427c8564c30SJames Wright   }
428c8564c30SJames Wright   // -- Set internal mat type
429c8564c30SJames Wright   {
430c8564c30SJames Wright     VecType vec_type;
431c8564c30SJames Wright     MatType internal_mat_type = MATAIJ;
432c8564c30SJames Wright 
433c8564c30SJames Wright     PetscCall(VecGetType(ctx->X_loc, &vec_type));
434c8564c30SJames Wright     if (strstr(vec_type, VECCUDA)) internal_mat_type = MATAIJCUSPARSE;
435c8564c30SJames Wright     else if (strstr(vec_type, VECKOKKOS)) internal_mat_type = MATAIJKOKKOS;
436c8564c30SJames Wright     else internal_mat_type = MATAIJ;
437c8564c30SJames Wright     PetscCall(PetscStrallocpy(internal_mat_type, &ctx->internal_mat_type));
438c8564c30SJames Wright   }
439c8564c30SJames Wright   // -- Set mat operations
440c8564c30SJames Wright   PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
441c8564c30SJames Wright   PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed));
442c8564c30SJames Wright   if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
443c8564c30SJames Wright   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
444c8564c30SJames Wright   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
445c8564c30SJames Wright   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
446c8564c30SJames Wright   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
447c8564c30SJames Wright   PetscCall(MatShellSetVecType(*mat, vec_type));
448c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
449c8564c30SJames Wright }
450c8564c30SJames Wright 
451c8564c30SJames Wright /**
452c8564c30SJames Wright   @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`.
453c8564c30SJames Wright 
454c8564c30SJames Wright   Collective across MPI processes.
455c8564c30SJames Wright 
456c8564c30SJames Wright   @param[in]   mat_ceed   `MATCEED` to copy from
457c8564c30SJames Wright   @param[out]  mat_other  `MatShell` or `MATCEED` to copy into
458c8564c30SJames Wright 
459c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
460c8564c30SJames Wright **/
461c8564c30SJames Wright PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) {
462c8564c30SJames Wright   PetscFunctionBeginUser;
463c8564c30SJames Wright   PetscCall(MatCeedRegisterLogEvents());
464c8564c30SJames Wright 
465c8564c30SJames Wright   // Check type compatibility
466c8564c30SJames Wright   {
467c8564c30SJames Wright     MatType mat_type_ceed, mat_type_other;
468c8564c30SJames Wright 
469c8564c30SJames Wright     PetscCall(MatGetType(mat_ceed, &mat_type_ceed));
470c8564c30SJames Wright     PetscCheck(!strcmp(mat_type_ceed, MATCEED), PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED);
471c8564c30SJames Wright     PetscCall(MatGetType(mat_ceed, &mat_type_other));
472c8564c30SJames Wright     PetscCheck(!strcmp(mat_type_other, MATCEED) || !strcmp(mat_type_other, MATSHELL), PETSC_COMM_SELF, PETSC_ERR_LIB,
473c8564c30SJames Wright                "mat_other must have type " MATCEED " or " MATSHELL);
474c8564c30SJames Wright   }
475c8564c30SJames Wright 
476c8564c30SJames Wright   // Check dimension compatibility
477c8564c30SJames Wright   {
478c8564c30SJames Wright     PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size;
479c8564c30SJames Wright 
480c8564c30SJames Wright     PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size));
481c8564c30SJames Wright     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size));
482c8564c30SJames Wright     PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size));
483c8564c30SJames Wright     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size));
484c8564c30SJames Wright     PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) &&
485c8564c30SJames Wright                    (X_l_ceed_size == X_l_other_size),
486c8564c30SJames Wright                PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ,
487c8564c30SJames Wright                "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT
488c8564c30SJames Wright                "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT
489c8564c30SJames Wright                ", %" PetscInt_FMT ")",
490c8564c30SJames Wright                Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size);
491c8564c30SJames Wright   }
492c8564c30SJames Wright 
493c8564c30SJames Wright   // Convert
494c8564c30SJames Wright   {
495c8564c30SJames Wright     VecType        vec_type;
496c8564c30SJames Wright     MatCeedContext ctx;
497c8564c30SJames Wright 
498c8564c30SJames Wright     PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED));
499c8564c30SJames Wright     PetscCall(MatShellGetContext(mat_ceed, &ctx));
500c8564c30SJames Wright     PetscCall(MatCeedContextReference(ctx));
501c8564c30SJames Wright     PetscCall(MatShellSetContext(mat_other, ctx));
502c8564c30SJames Wright     PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
503c8564c30SJames Wright     PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed));
504c8564c30SJames Wright     if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
505c8564c30SJames Wright     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
506c8564c30SJames Wright     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
507c8564c30SJames Wright     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
508c8564c30SJames Wright     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
509c8564c30SJames Wright     {
510c8564c30SJames Wright       PetscInt block_size;
511c8564c30SJames Wright 
512c8564c30SJames Wright       PetscCall(MatGetBlockSize(mat_ceed, &block_size));
513c8564c30SJames Wright       if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size));
514c8564c30SJames Wright     }
515c8564c30SJames Wright     {
516c8564c30SJames Wright       PetscInt        num_blocks;
517c8564c30SJames Wright       const PetscInt *block_sizes;
518c8564c30SJames Wright 
519c8564c30SJames Wright       PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes));
520c8564c30SJames Wright       if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes));
521c8564c30SJames Wright     }
522c8564c30SJames Wright     PetscCall(DMGetVecType(ctx->dm_x, &vec_type));
523c8564c30SJames Wright     PetscCall(MatShellSetVecType(mat_other, vec_type));
524c8564c30SJames Wright   }
525c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
526c8564c30SJames Wright }
527c8564c30SJames Wright 
528c8564c30SJames Wright /**
529c8564c30SJames Wright   @brief Assemble a `MATCEED` into a `MATAIJ` or similar.
530c8564c30SJames Wright          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
531c8564c30SJames Wright          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
532c8564c30SJames Wright 
533c8564c30SJames Wright   Collective across MPI processes.
534c8564c30SJames Wright 
535c8564c30SJames Wright   @param[in]      mat_ceed  `MATCEED` to assemble
536c8564c30SJames Wright   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
537c8564c30SJames Wright 
538c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
539c8564c30SJames Wright **/
540c8564c30SJames Wright PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) {
541c8564c30SJames Wright   MatCeedContext ctx;
542c8564c30SJames Wright 
543c8564c30SJames Wright   PetscFunctionBeginUser;
544c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
545c8564c30SJames Wright 
546c8564c30SJames Wright   // Check if COO pattern set
547c8564c30SJames Wright   {
548c8564c30SJames Wright     PetscInt index = -1;
549c8564c30SJames Wright 
550c8564c30SJames Wright     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
551c8564c30SJames Wright       if (ctx->mats_assembled_full[i] == mat_coo) index = i;
552c8564c30SJames Wright     }
553c8564c30SJames Wright     if (index == -1) {
554c8564c30SJames Wright       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
555c8564c30SJames Wright       CeedInt      *rows_ceed, *cols_ceed;
556c8564c30SJames Wright       PetscCount    num_entries;
557c8564c30SJames Wright       PetscLogStage stage_amg_setup;
558c8564c30SJames Wright 
559c8564c30SJames Wright       // -- Assemble sparsity pattern if mat hasn't been assembled before
560c8564c30SJames Wright       PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup));
561c8564c30SJames Wright       if (stage_amg_setup == -1) {
562c8564c30SJames Wright         PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup));
563c8564c30SJames Wright       }
564c8564c30SJames Wright       PetscCall(PetscLogStagePush(stage_amg_setup));
56554831c5fSJames Wright       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
566d0593705SJames Wright       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
567d0593705SJames Wright       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
568c8564c30SJames Wright       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
569c8564c30SJames Wright       free(rows_petsc);
570c8564c30SJames Wright       free(cols_petsc);
57154831c5fSJames Wright       if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full));
572c8564c30SJames Wright       PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full));
573c8564c30SJames Wright       ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo;
574c8564c30SJames Wright       PetscCall(PetscLogStagePop());
575c8564c30SJames Wright     }
576c8564c30SJames Wright   }
577c8564c30SJames Wright 
578c8564c30SJames Wright   // Assemble mat_ceed
579c8564c30SJames Wright   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
580c8564c30SJames Wright   {
581c8564c30SJames Wright     const CeedScalar *values;
582c8564c30SJames Wright     MatType           mat_type;
583c8564c30SJames Wright     CeedMemType       mem_type = CEED_MEM_HOST;
584c8564c30SJames Wright     PetscBool         is_spd, is_spd_known;
585c8564c30SJames Wright 
586c8564c30SJames Wright     PetscCall(MatGetType(mat_coo, &mat_type));
587c8564c30SJames Wright     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
588c8564c30SJames Wright     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
589c8564c30SJames Wright     else mem_type = CEED_MEM_HOST;
590c8564c30SJames Wright 
59154831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full));
59254831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values));
593c8564c30SJames Wright     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
594c8564c30SJames Wright     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
595c8564c30SJames Wright     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
59654831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values));
597c8564c30SJames Wright   }
598c8564c30SJames Wright   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
599c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
600c8564c30SJames Wright }
601c8564c30SJames Wright 
602c8564c30SJames Wright /**
603c8564c30SJames Wright   @brief Set user context for a `MATCEED`.
604c8564c30SJames Wright 
605c8564c30SJames Wright   Collective across MPI processes.
606c8564c30SJames Wright 
607c8564c30SJames Wright   @param[in,out]  mat  `MATCEED`
608c8564c30SJames Wright   @param[in]      f    The context destroy function, or NULL
609c8564c30SJames Wright   @param[in]      ctx  User context, or NULL to unset
610c8564c30SJames Wright 
611c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
612c8564c30SJames Wright **/
613c8564c30SJames Wright PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) {
614c8564c30SJames Wright   PetscContainer user_ctx = NULL;
615c8564c30SJames Wright 
616c8564c30SJames Wright   PetscFunctionBeginUser;
617c8564c30SJames Wright   if (ctx) {
618c8564c30SJames Wright     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx));
619c8564c30SJames Wright     PetscCall(PetscContainerSetPointer(user_ctx, ctx));
620c8564c30SJames Wright     PetscCall(PetscContainerSetUserDestroy(user_ctx, f));
621c8564c30SJames Wright   }
622c8564c30SJames Wright   PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx));
623c8564c30SJames Wright   PetscCall(PetscContainerDestroy(&user_ctx));
624c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
625c8564c30SJames Wright }
626c8564c30SJames Wright 
627c8564c30SJames Wright /**
628c8564c30SJames Wright   @brief Retrieve the user context for a `MATCEED`.
629c8564c30SJames Wright 
630c8564c30SJames Wright   Collective across MPI processes.
631c8564c30SJames Wright 
632c8564c30SJames Wright   @param[in,out]  mat  `MATCEED`
633c8564c30SJames Wright   @param[in]      ctx  User context
634c8564c30SJames Wright 
635c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
636c8564c30SJames Wright **/
637c8564c30SJames Wright PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) {
638c8564c30SJames Wright   PetscContainer user_ctx;
639c8564c30SJames Wright 
640c8564c30SJames Wright   PetscFunctionBeginUser;
641c8564c30SJames Wright   PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx));
642c8564c30SJames Wright   if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx));
643c8564c30SJames Wright   else *(void **)ctx = NULL;
644c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
645c8564c30SJames Wright }
646c8564c30SJames Wright 
647c8564c30SJames Wright /**
648c8564c30SJames Wright   @brief Sets the inner matrix type as a string from the `MATCEED`.
649c8564c30SJames Wright 
650c8564c30SJames Wright   Collective across MPI processes.
651c8564c30SJames Wright 
652c8564c30SJames Wright   @param[in,out]  mat   `MATCEED`
653c8564c30SJames Wright   @param[in]      type  Inner `MatType` to set
654c8564c30SJames Wright 
655c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
656c8564c30SJames Wright **/
657c8564c30SJames Wright PetscErrorCode MatCeedSetInnerMatType(Mat mat, MatType type) {
658c8564c30SJames Wright   MatCeedContext ctx;
659c8564c30SJames Wright 
660c8564c30SJames Wright   PetscFunctionBeginUser;
661c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
662c8564c30SJames Wright   // Check if same
663c8564c30SJames Wright   {
664c8564c30SJames Wright     size_t    len_old, len_new;
665c8564c30SJames Wright     PetscBool is_same = PETSC_FALSE;
666c8564c30SJames Wright 
667c8564c30SJames Wright     PetscCall(PetscStrlen(ctx->internal_mat_type, &len_old));
668c8564c30SJames Wright     PetscCall(PetscStrlen(type, &len_new));
669c8564c30SJames Wright     if (len_old == len_new) PetscCall(PetscStrncmp(ctx->internal_mat_type, type, len_old, &is_same));
670c8564c30SJames Wright     if (is_same) PetscFunctionReturn(PETSC_SUCCESS);
671c8564c30SJames Wright   }
672c8564c30SJames Wright   // Clean up old mats in different format
673c8564c30SJames Wright   // LCOV_EXCL_START
674c8564c30SJames Wright   if (ctx->mat_assembled_full_internal) {
675c8564c30SJames Wright     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
676c8564c30SJames Wright       if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) {
677c8564c30SJames Wright         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) {
678c8564c30SJames Wright           ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j];
679c8564c30SJames Wright         }
680c8564c30SJames Wright         ctx->num_mats_assembled_full--;
681c8564c30SJames Wright         // Note: we'll realloc this array again, so no need to shrink the allocation
682c8564c30SJames Wright         PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
683c8564c30SJames Wright       }
684c8564c30SJames Wright     }
685c8564c30SJames Wright   }
686c8564c30SJames Wright   if (ctx->mat_assembled_pbd_internal) {
687c8564c30SJames Wright     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
688c8564c30SJames Wright       if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) {
689c8564c30SJames Wright         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) {
690c8564c30SJames Wright           ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j];
691c8564c30SJames Wright         }
692c8564c30SJames Wright         // Note: we'll realloc this array again, so no need to shrink the allocation
693c8564c30SJames Wright         ctx->num_mats_assembled_pbd--;
694c8564c30SJames Wright         PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
695c8564c30SJames Wright       }
696c8564c30SJames Wright     }
697c8564c30SJames Wright   }
698c8564c30SJames Wright   PetscCall(PetscFree(ctx->internal_mat_type));
699c8564c30SJames Wright   PetscCall(PetscStrallocpy(type, &ctx->internal_mat_type));
700c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
701c8564c30SJames Wright   // LCOV_EXCL_STOP
702c8564c30SJames Wright }
703c8564c30SJames Wright 
704c8564c30SJames Wright /**
705c8564c30SJames Wright   @brief Gets the inner matrix type as a string from the `MATCEED`.
706c8564c30SJames Wright 
707c8564c30SJames Wright   Collective across MPI processes.
708c8564c30SJames Wright 
709c8564c30SJames Wright   @param[in,out]  mat   `MATCEED`
710c8564c30SJames Wright   @param[in]      type  Inner `MatType`
711c8564c30SJames Wright 
712c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
713c8564c30SJames Wright **/
714c8564c30SJames Wright PetscErrorCode MatCeedGetInnerMatType(Mat mat, MatType *type) {
715c8564c30SJames Wright   MatCeedContext ctx;
716c8564c30SJames Wright 
717c8564c30SJames Wright   PetscFunctionBeginUser;
718c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
719c8564c30SJames Wright   *type = ctx->internal_mat_type;
720c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
721c8564c30SJames Wright }
722c8564c30SJames Wright 
723c8564c30SJames Wright /**
724c8564c30SJames Wright   @brief Set a user defined matrix operation for a `MATCEED` matrix.
725c8564c30SJames Wright 
726c8564c30SJames Wright   Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by
727c8564c30SJames Wright `MatCeedSetContext()`.
728c8564c30SJames Wright 
729c8564c30SJames Wright   Collective across MPI processes.
730c8564c30SJames Wright 
731c8564c30SJames Wright   @param[in,out]  mat  `MATCEED`
732c8564c30SJames Wright   @param[in]      op   Name of the `MatOperation`
733c8564c30SJames Wright   @param[in]      g    Function that provides the operation
734c8564c30SJames Wright 
735c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
736c8564c30SJames Wright **/
737c8564c30SJames Wright PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) {
738c8564c30SJames Wright   PetscFunctionBeginUser;
739c8564c30SJames Wright   PetscCall(MatShellSetOperation(mat, op, g));
740c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
741c8564c30SJames Wright }
742c8564c30SJames Wright 
743c8564c30SJames Wright /**
744c8564c30SJames Wright   @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
745c8564c30SJames Wright 
746c8564c30SJames Wright   Not collective across MPI processes.
747c8564c30SJames Wright 
748c8564c30SJames Wright   @param[in,out]  mat              `MATCEED`
749c8564c30SJames Wright   @param[in]      X_loc            Input PETSc local vector, or NULL
750c8564c30SJames Wright   @param[in]      Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
751c8564c30SJames Wright 
752c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
753c8564c30SJames Wright **/
754c8564c30SJames Wright PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) {
755c8564c30SJames Wright   MatCeedContext ctx;
756c8564c30SJames Wright 
757c8564c30SJames Wright   PetscFunctionBeginUser;
758c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
759c8564c30SJames Wright   if (X_loc) {
760c8564c30SJames Wright     PetscInt len_old, len_new;
761c8564c30SJames Wright 
762c8564c30SJames Wright     PetscCall(VecGetSize(ctx->X_loc, &len_old));
763c8564c30SJames Wright     PetscCall(VecGetSize(X_loc, &len_new));
764c8564c30SJames Wright     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT,
765c8564c30SJames Wright                len_new, len_old);
766c8564c30SJames Wright     PetscCall(VecDestroy(&ctx->X_loc));
767c8564c30SJames Wright     ctx->X_loc = X_loc;
768c8564c30SJames Wright     PetscCall(PetscObjectReference((PetscObject)X_loc));
769c8564c30SJames Wright   }
770c8564c30SJames Wright   if (Y_loc_transpose) {
771c8564c30SJames Wright     PetscInt len_old, len_new;
772c8564c30SJames Wright 
773c8564c30SJames Wright     PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old));
774c8564c30SJames Wright     PetscCall(VecGetSize(Y_loc_transpose, &len_new));
775c8564c30SJames Wright     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB,
776c8564c30SJames Wright                "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old);
777c8564c30SJames Wright     PetscCall(VecDestroy(&ctx->Y_loc_transpose));
778c8564c30SJames Wright     ctx->Y_loc_transpose = Y_loc_transpose;
779c8564c30SJames Wright     PetscCall(PetscObjectReference((PetscObject)Y_loc_transpose));
780c8564c30SJames Wright   }
781c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
782c8564c30SJames Wright }
783c8564c30SJames Wright 
784c8564c30SJames Wright /**
785c8564c30SJames Wright   @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
786c8564c30SJames Wright 
787c8564c30SJames Wright   Not collective across MPI processes.
788c8564c30SJames Wright 
789c8564c30SJames Wright   @param[in,out]  mat              `MATCEED`
790c8564c30SJames Wright   @param[out]     X_loc            Input PETSc local vector, or NULL
791c8564c30SJames Wright   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
792c8564c30SJames Wright 
793c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
794c8564c30SJames Wright **/
795c8564c30SJames Wright PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
796c8564c30SJames Wright   MatCeedContext ctx;
797c8564c30SJames Wright 
798c8564c30SJames Wright   PetscFunctionBeginUser;
799c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
800c8564c30SJames Wright   if (X_loc) {
801c8564c30SJames Wright     *X_loc = ctx->X_loc;
802c8564c30SJames Wright     PetscCall(PetscObjectReference((PetscObject)*X_loc));
803c8564c30SJames Wright   }
804c8564c30SJames Wright   if (Y_loc_transpose) {
805c8564c30SJames Wright     *Y_loc_transpose = ctx->Y_loc_transpose;
806c8564c30SJames Wright     PetscCall(PetscObjectReference((PetscObject)*Y_loc_transpose));
807c8564c30SJames Wright   }
808c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
809c8564c30SJames Wright }
810c8564c30SJames Wright 
811c8564c30SJames Wright /**
812c8564c30SJames Wright   @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
813c8564c30SJames Wright 
814c8564c30SJames Wright   Not collective across MPI processes.
815c8564c30SJames Wright 
816c8564c30SJames Wright   @param[in,out]  mat              MatCeed
817c8564c30SJames Wright   @param[out]     X_loc            Input PETSc local vector, or NULL
818c8564c30SJames Wright   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
819c8564c30SJames Wright 
820c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
821c8564c30SJames Wright **/
822c8564c30SJames Wright PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
823c8564c30SJames Wright   PetscFunctionBeginUser;
824c8564c30SJames Wright   if (X_loc) PetscCall(VecDestroy(X_loc));
825c8564c30SJames Wright   if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose));
826c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
827c8564c30SJames Wright }
828c8564c30SJames Wright 
829c8564c30SJames Wright /**
830c8564c30SJames Wright   @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
831c8564c30SJames Wright 
832c8564c30SJames Wright   Not collective across MPI processes.
833c8564c30SJames Wright 
834c8564c30SJames Wright   @param[in,out]  mat                MatCeed
835c8564c30SJames Wright   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
836c8564c30SJames Wright   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
837c8564c30SJames Wright 
838c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
839c8564c30SJames Wright **/
840c8564c30SJames Wright PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
841c8564c30SJames Wright   MatCeedContext ctx;
842c8564c30SJames Wright 
843c8564c30SJames Wright   PetscFunctionBeginUser;
844c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
845c8564c30SJames Wright   if (op_mult) {
846c8564c30SJames Wright     *op_mult = NULL;
84754831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult));
848c8564c30SJames Wright   }
849c8564c30SJames Wright   if (op_mult_transpose) {
850c8564c30SJames Wright     *op_mult_transpose = NULL;
85154831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose));
852c8564c30SJames Wright   }
853c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
854c8564c30SJames Wright }
855c8564c30SJames Wright 
856c8564c30SJames Wright /**
857c8564c30SJames Wright   @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
858c8564c30SJames Wright 
859c8564c30SJames Wright   Not collective across MPI processes.
860c8564c30SJames Wright 
861c8564c30SJames Wright   @param[in,out]  mat                MatCeed
862c8564c30SJames Wright   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
863c8564c30SJames Wright   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
864c8564c30SJames Wright 
865c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
866c8564c30SJames Wright **/
867c8564c30SJames Wright PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
868c8564c30SJames Wright   MatCeedContext ctx;
869c8564c30SJames Wright 
870c8564c30SJames Wright   PetscFunctionBeginUser;
871c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
87254831c5fSJames Wright   if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult));
87354831c5fSJames Wright   if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose));
874c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
875c8564c30SJames Wright }
876c8564c30SJames Wright 
877c8564c30SJames Wright /**
878c8564c30SJames Wright   @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
879c8564c30SJames Wright 
880c8564c30SJames Wright   Not collective across MPI processes.
881c8564c30SJames Wright 
882c8564c30SJames Wright   @param[in,out]  mat                       MatCeed
883c8564c30SJames Wright   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
884c8564c30SJames Wright   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
885c8564c30SJames Wright 
886c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
887c8564c30SJames Wright **/
888c8564c30SJames Wright PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
889c8564c30SJames Wright   MatCeedContext ctx;
890c8564c30SJames Wright 
891c8564c30SJames Wright   PetscFunctionBeginUser;
892c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
893c8564c30SJames Wright   if (log_event_mult) ctx->log_event_mult = log_event_mult;
894c8564c30SJames Wright   if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose;
895c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
896c8564c30SJames Wright }
897c8564c30SJames Wright 
898c8564c30SJames Wright /**
899c8564c30SJames Wright   @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
900c8564c30SJames Wright 
901c8564c30SJames Wright   Not collective across MPI processes.
902c8564c30SJames Wright 
903c8564c30SJames Wright   @param[in,out]  mat                       MatCeed
904c8564c30SJames Wright   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
905c8564c30SJames Wright   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
906c8564c30SJames Wright 
907c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
908c8564c30SJames Wright **/
909c8564c30SJames Wright PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
910c8564c30SJames Wright   MatCeedContext ctx;
911c8564c30SJames Wright 
912c8564c30SJames Wright   PetscFunctionBeginUser;
913c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
914c8564c30SJames Wright   if (log_event_mult) *log_event_mult = ctx->log_event_mult;
915c8564c30SJames Wright   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose;
916c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
917c8564c30SJames Wright }
918c8564c30SJames Wright 
919c8564c30SJames Wright // -----------------------------------------------------------------------------
920c8564c30SJames Wright // Operator context data
921c8564c30SJames Wright // -----------------------------------------------------------------------------
922c8564c30SJames Wright 
923c8564c30SJames Wright /**
924c8564c30SJames Wright   @brief Setup context data for operator application.
925c8564c30SJames Wright 
926c8564c30SJames Wright   Collective across MPI processes.
927c8564c30SJames Wright 
928c8564c30SJames Wright   @param[in]   dm_x                      Input `DM`
929c8564c30SJames Wright   @param[in]   dm_y                      Output `DM`
930c8564c30SJames Wright   @param[in]   X_loc                     Input PETSc local vector, or NULL
931c8564c30SJames Wright   @param[in]   Y_loc_transpose           Input PETSc local vector for transpose operation, or NULL
932c8564c30SJames Wright   @param[in]   op_mult                   `CeedOperator` for forward evaluation
933c8564c30SJames Wright   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
934c8564c30SJames Wright   @param[in]   log_event_mult            `PetscLogEvent` for forward evaluation
935c8564c30SJames Wright   @param[in]   log_event_mult_transpose  `PetscLogEvent` for transpose evaluation
936c8564c30SJames Wright   @param[out]  ctx                       Context data for operator evaluation
937c8564c30SJames Wright 
938c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
939c8564c30SJames Wright **/
940c8564c30SJames Wright PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose,
941c8564c30SJames Wright                                     PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, MatCeedContext *ctx) {
942c8564c30SJames Wright   CeedSize x_loc_len, y_loc_len;
943c8564c30SJames Wright 
944c8564c30SJames Wright   PetscFunctionBeginUser;
945c8564c30SJames Wright 
946c8564c30SJames Wright   // Allocate
947c8564c30SJames Wright   PetscCall(PetscNew(ctx));
948c8564c30SJames Wright   (*ctx)->ref_count = 1;
949c8564c30SJames Wright 
950c8564c30SJames Wright   // Logging
951c8564c30SJames Wright   (*ctx)->log_event_mult           = log_event_mult;
952c8564c30SJames Wright   (*ctx)->log_event_mult_transpose = log_event_mult_transpose;
953c8564c30SJames Wright 
954c8564c30SJames Wright   // PETSc objects
955c8564c30SJames Wright   PetscCall(PetscObjectReference((PetscObject)dm_x));
956c8564c30SJames Wright   (*ctx)->dm_x = dm_x;
957c8564c30SJames Wright   PetscCall(PetscObjectReference((PetscObject)dm_y));
958c8564c30SJames Wright   (*ctx)->dm_y = dm_y;
959c8564c30SJames Wright   if (X_loc) PetscCall(PetscObjectReference((PetscObject)X_loc));
960c8564c30SJames Wright   (*ctx)->X_loc = X_loc;
961c8564c30SJames Wright   if (Y_loc_transpose) PetscCall(PetscObjectReference((PetscObject)Y_loc_transpose));
962c8564c30SJames Wright   (*ctx)->Y_loc_transpose = Y_loc_transpose;
963c8564c30SJames Wright 
964c8564c30SJames Wright   // Memtype
965c8564c30SJames Wright   {
966c8564c30SJames Wright     const PetscScalar *x;
967c8564c30SJames Wright     Vec                X;
968c8564c30SJames Wright 
969c8564c30SJames Wright     PetscCall(DMGetLocalVector(dm_x, &X));
970c8564c30SJames Wright     PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type));
971c8564c30SJames Wright     PetscCall(VecRestoreArrayReadAndMemType(X, &x));
972c8564c30SJames Wright     PetscCall(DMRestoreLocalVector(dm_x, &X));
973c8564c30SJames Wright   }
974c8564c30SJames Wright 
975c8564c30SJames Wright   // libCEED objects
976c8564c30SJames Wright   PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB,
977c8564c30SJames Wright              "retrieving Ceed context object failed");
97854831c5fSJames Wright   PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed));
97954831c5fSJames Wright   PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len));
98054831c5fSJames Wright   PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult));
98154831c5fSJames Wright   if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose));
98254831c5fSJames Wright   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc));
98354831c5fSJames Wright   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc));
984c8564c30SJames Wright 
985c8564c30SJames Wright   // Flop counting
986c8564c30SJames Wright   {
987c8564c30SJames Wright     CeedSize ceed_flops_estimate = 0;
988c8564c30SJames Wright 
98954831c5fSJames Wright     PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate));
990c8564c30SJames Wright     (*ctx)->flops_mult = ceed_flops_estimate;
991c8564c30SJames Wright     if (op_mult_transpose) {
99254831c5fSJames Wright       PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate));
993c8564c30SJames Wright       (*ctx)->flops_mult_transpose = ceed_flops_estimate;
994c8564c30SJames Wright     }
995c8564c30SJames Wright   }
996c8564c30SJames Wright 
997c8564c30SJames Wright   // Check sizes
998c8564c30SJames Wright   if (x_loc_len > 0 || y_loc_len > 0) {
999c8564c30SJames Wright     CeedSize ctx_x_loc_len, ctx_y_loc_len;
1000c8564c30SJames Wright     PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len;
1001c8564c30SJames Wright     Vec      dm_X_loc, dm_Y_loc;
1002c8564c30SJames Wright 
1003c8564c30SJames Wright     // -- Input
1004c8564c30SJames Wright     PetscCall(DMGetLocalVector(dm_x, &dm_X_loc));
1005c8564c30SJames Wright     PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len));
1006c8564c30SJames Wright     PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc));
100754831c5fSJames Wright     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len));
1008*c86b9822SJames Wright     if (X_loc) {
1009*c86b9822SJames Wright       PetscCall(VecGetLocalSize(X_loc, &X_loc_len));
1010*c86b9822SJames Wright       PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1011*c86b9822SJames Wright                  "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len);
1012*c86b9822SJames Wright     }
1013*c86b9822SJames Wright     PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions",
1014*c86b9822SJames Wright                x_loc_len, dm_x_loc_len);
1015*c86b9822SJames Wright     PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")",
1016*c86b9822SJames Wright                x_loc_len, ctx_x_loc_len);
1017c8564c30SJames Wright 
1018c8564c30SJames Wright     // -- Output
1019c8564c30SJames Wright     PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc));
1020c8564c30SJames Wright     PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len));
1021c8564c30SJames Wright     PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc));
102254831c5fSJames Wright     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len));
1023*c86b9822SJames Wright     PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions",
1024*c86b9822SJames Wright                ctx_y_loc_len, dm_y_loc_len);
1025c8564c30SJames Wright 
1026c8564c30SJames Wright     // -- Transpose
1027c8564c30SJames Wright     if (Y_loc_transpose) {
1028c8564c30SJames Wright       PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len));
1029*c86b9822SJames Wright       PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1030*c86b9822SJames Wright                  "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len);
1031c8564c30SJames Wright     }
1032c8564c30SJames Wright   }
1033c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1034c8564c30SJames Wright }
1035c8564c30SJames Wright 
1036c8564c30SJames Wright /**
1037c8564c30SJames Wright   @brief Increment reference counter for `MATCEED` context.
1038c8564c30SJames Wright 
1039c8564c30SJames Wright   Not collective across MPI processes.
1040c8564c30SJames Wright 
1041c8564c30SJames Wright   @param[in,out]  ctx  Context data
1042c8564c30SJames Wright 
1043c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
1044c8564c30SJames Wright **/
1045c8564c30SJames Wright PetscErrorCode MatCeedContextReference(MatCeedContext ctx) {
1046c8564c30SJames Wright   PetscFunctionBeginUser;
1047c8564c30SJames Wright   ctx->ref_count++;
1048c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1049c8564c30SJames Wright }
1050c8564c30SJames Wright 
1051c8564c30SJames Wright /**
1052c8564c30SJames Wright   @brief Copy reference for `MATCEED`.
1053c8564c30SJames Wright          Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`.
1054c8564c30SJames Wright 
1055c8564c30SJames Wright   Not collective across MPI processes.
1056c8564c30SJames Wright 
1057c8564c30SJames Wright   @param[in]   ctx       Context data
1058c8564c30SJames Wright   @param[out]  ctx_copy  Copy of pointer to context data
1059c8564c30SJames Wright 
1060c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
1061c8564c30SJames Wright **/
1062c8564c30SJames Wright PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) {
1063c8564c30SJames Wright   PetscFunctionBeginUser;
1064c8564c30SJames Wright   PetscCall(MatCeedContextReference(ctx));
1065c8564c30SJames Wright   PetscCall(MatCeedContextDestroy(*ctx_copy));
1066c8564c30SJames Wright   *ctx_copy = ctx;
1067c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1068c8564c30SJames Wright }
1069c8564c30SJames Wright 
1070c8564c30SJames Wright /**
1071c8564c30SJames Wright   @brief Destroy context data for operator application.
1072c8564c30SJames Wright 
1073c8564c30SJames Wright   Collective across MPI processes.
1074c8564c30SJames Wright 
1075c8564c30SJames Wright   @param[in,out]  ctx  Context data for operator evaluation
1076c8564c30SJames Wright 
1077c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
1078c8564c30SJames Wright **/
1079c8564c30SJames Wright PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) {
1080c8564c30SJames Wright   PetscFunctionBeginUser;
1081c8564c30SJames Wright   if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS);
1082c8564c30SJames Wright 
1083c8564c30SJames Wright   // PETSc objects
1084c8564c30SJames Wright   PetscCall(DMDestroy(&ctx->dm_x));
1085c8564c30SJames Wright   PetscCall(DMDestroy(&ctx->dm_y));
1086c8564c30SJames Wright   PetscCall(VecDestroy(&ctx->X_loc));
1087c8564c30SJames Wright   PetscCall(VecDestroy(&ctx->Y_loc_transpose));
1088c8564c30SJames Wright   PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1089c8564c30SJames Wright   PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1090c8564c30SJames Wright   PetscCall(PetscFree(ctx->internal_mat_type));
1091c8564c30SJames Wright   PetscCall(PetscFree(ctx->mats_assembled_full));
1092c8564c30SJames Wright   PetscCall(PetscFree(ctx->mats_assembled_pbd));
1093c8564c30SJames Wright 
1094c8564c30SJames Wright   // libCEED objects
109554831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc));
109654831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc));
109754831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full));
109854831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd));
109954831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult));
110054831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose));
1101c8564c30SJames Wright   PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed");
1102c8564c30SJames Wright 
1103c8564c30SJames Wright   // Deallocate
1104c8564c30SJames Wright   ctx->is_destroyed = PETSC_TRUE;  // Flag as destroyed in case someone has stale ref
1105c8564c30SJames Wright   PetscCall(PetscFree(ctx));
1106c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1107c8564c30SJames Wright }
1108c8564c30SJames Wright 
1109c8564c30SJames Wright /**
1110c8564c30SJames Wright   @brief Compute the diagonal of an operator via libCEED.
1111c8564c30SJames Wright 
1112c8564c30SJames Wright   Collective across MPI processes.
1113c8564c30SJames Wright 
1114c8564c30SJames Wright   @param[in]   A  `MATCEED`
1115c8564c30SJames Wright   @param[out]  D  Vector holding operator diagonal
1116c8564c30SJames Wright 
1117c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
1118c8564c30SJames Wright **/
1119c8564c30SJames Wright PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) {
1120c8564c30SJames Wright   PetscMemType   mem_type;
1121c8564c30SJames Wright   Vec            D_loc;
1122c8564c30SJames Wright   MatCeedContext ctx;
1123c8564c30SJames Wright 
1124c8564c30SJames Wright   PetscFunctionBeginUser;
1125c8564c30SJames Wright   PetscCall(MatShellGetContext(A, &ctx));
1126c8564c30SJames Wright 
1127c8564c30SJames Wright   // Place PETSc vector in libCEED vector
1128c8564c30SJames Wright   PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc));
1129d0593705SJames Wright   PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc));
1130c8564c30SJames Wright 
1131c8564c30SJames Wright   // Compute Diagonal
113254831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1133c8564c30SJames Wright 
1134c8564c30SJames Wright   // Restore PETSc vector
1135d0593705SJames Wright   PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc));
1136c8564c30SJames Wright 
1137c8564c30SJames Wright   // Local-to-Global
1138c8564c30SJames Wright   PetscCall(VecZeroEntries(D));
1139c8564c30SJames Wright   PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D));
1140c8564c30SJames Wright   PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc));
1141c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1142c8564c30SJames Wright }
1143c8564c30SJames Wright 
1144c8564c30SJames Wright /**
1145c8564c30SJames Wright   @brief Compute `A X = Y` for a `MATCEED`.
1146c8564c30SJames Wright 
1147c8564c30SJames Wright   Collective across MPI processes.
1148c8564c30SJames Wright 
1149c8564c30SJames Wright   @param[in]   A  `MATCEED`
1150c8564c30SJames Wright   @param[in]   X  Input PETSc vector
1151c8564c30SJames Wright   @param[out]  Y  Output PETSc vector
1152c8564c30SJames Wright 
1153c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
1154c8564c30SJames Wright **/
1155c8564c30SJames Wright PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) {
1156c8564c30SJames Wright   MatCeedContext ctx;
1157c8564c30SJames Wright 
1158c8564c30SJames Wright   PetscFunctionBeginUser;
1159c8564c30SJames Wright   PetscCall(MatShellGetContext(A, &ctx));
1160c8564c30SJames Wright   PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, 0));
1161c8564c30SJames Wright 
1162c8564c30SJames Wright   {
1163c8564c30SJames Wright     PetscMemType x_mem_type, y_mem_type;
1164c8564c30SJames Wright     Vec          X_loc = ctx->X_loc, Y_loc;
1165c8564c30SJames Wright 
1166c8564c30SJames Wright     // Get local vectors
1167c8564c30SJames Wright     if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1168c8564c30SJames Wright     PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1169c8564c30SJames Wright 
1170c8564c30SJames Wright     // Global-to-local
1171c8564c30SJames Wright     PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc));
1172c8564c30SJames Wright 
1173c8564c30SJames Wright     // Setup libCEED vectors
1174d0593705SJames Wright     PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1175c8564c30SJames Wright     PetscCall(VecZeroEntries(Y_loc));
1176d0593705SJames Wright     PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1177c8564c30SJames Wright 
1178c8564c30SJames Wright     // Apply libCEED operator
1179c8564c30SJames Wright     PetscCall(PetscLogGpuTimeBegin());
118054831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE));
1181c8564c30SJames Wright     PetscCall(PetscLogGpuTimeEnd());
1182c8564c30SJames Wright 
1183c8564c30SJames Wright     // Restore PETSc vectors
1184d0593705SJames Wright     PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1185d0593705SJames Wright     PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1186c8564c30SJames Wright 
1187c8564c30SJames Wright     // Local-to-global
1188c8564c30SJames Wright     PetscCall(VecZeroEntries(Y));
1189c8564c30SJames Wright     PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y));
1190c8564c30SJames Wright 
1191c8564c30SJames Wright     // Restore local vectors, as needed
1192c8564c30SJames Wright     if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1193c8564c30SJames Wright     PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1194c8564c30SJames Wright   }
1195c8564c30SJames Wright 
1196c8564c30SJames Wright   // Log flops
1197c8564c30SJames Wright   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult));
1198c8564c30SJames Wright   else PetscCall(PetscLogFlops(ctx->flops_mult));
1199c8564c30SJames Wright 
1200c8564c30SJames Wright   PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, 0));
1201c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1202c8564c30SJames Wright }
1203c8564c30SJames Wright 
1204c8564c30SJames Wright /**
1205c8564c30SJames Wright   @brief Compute `A^T Y = X` for a `MATCEED`.
1206c8564c30SJames Wright 
1207c8564c30SJames Wright   Collective across MPI processes.
1208c8564c30SJames Wright 
1209c8564c30SJames Wright   @param[in]   A  `MATCEED`
1210c8564c30SJames Wright   @param[in]   Y  Input PETSc vector
1211c8564c30SJames Wright   @param[out]  X  Output PETSc vector
1212c8564c30SJames Wright 
1213c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
1214c8564c30SJames Wright **/
1215c8564c30SJames Wright PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) {
1216c8564c30SJames Wright   MatCeedContext ctx;
1217c8564c30SJames Wright 
1218c8564c30SJames Wright   PetscFunctionBeginUser;
1219c8564c30SJames Wright   PetscCall(MatShellGetContext(A, &ctx));
1220c8564c30SJames Wright   PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, 0));
1221c8564c30SJames Wright 
1222c8564c30SJames Wright   {
1223c8564c30SJames Wright     PetscMemType x_mem_type, y_mem_type;
1224c8564c30SJames Wright     Vec          X_loc, Y_loc = ctx->Y_loc_transpose;
1225c8564c30SJames Wright 
1226c8564c30SJames Wright     // Get local vectors
1227c8564c30SJames Wright     if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1228c8564c30SJames Wright     PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1229c8564c30SJames Wright 
1230c8564c30SJames Wright     // Global-to-local
1231c8564c30SJames Wright     PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc));
1232c8564c30SJames Wright 
1233c8564c30SJames Wright     // Setup libCEED vectors
1234d0593705SJames Wright     PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1235c8564c30SJames Wright     PetscCall(VecZeroEntries(X_loc));
1236d0593705SJames Wright     PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1237c8564c30SJames Wright 
1238c8564c30SJames Wright     // Apply libCEED operator
1239c8564c30SJames Wright     PetscCall(PetscLogGpuTimeBegin());
124054831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1241c8564c30SJames Wright     PetscCall(PetscLogGpuTimeEnd());
1242c8564c30SJames Wright 
1243c8564c30SJames Wright     // Restore PETSc vectors
1244d0593705SJames Wright     PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1245d0593705SJames Wright     PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1246c8564c30SJames Wright 
1247c8564c30SJames Wright     // Local-to-global
1248c8564c30SJames Wright     PetscCall(VecZeroEntries(X));
1249c8564c30SJames Wright     PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X));
1250c8564c30SJames Wright 
1251c8564c30SJames Wright     // Restore local vectors, as needed
1252c8564c30SJames Wright     if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1253c8564c30SJames Wright     PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1254c8564c30SJames Wright   }
1255c8564c30SJames Wright 
1256c8564c30SJames Wright   // Log flops
1257c8564c30SJames Wright   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose));
1258c8564c30SJames Wright   else PetscCall(PetscLogFlops(ctx->flops_mult_transpose));
1259c8564c30SJames Wright 
1260c8564c30SJames Wright   PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, 0));
1261c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1262c8564c30SJames Wright }
1263