xref: /libCEED/examples/fluids/src/mat-ceed.c (revision 3933d9a06aeb299c1f872998cbdde051f6b53220)
1c8564c30SJames Wright /// @file
2c8564c30SJames Wright /// MatCeed and it's related operators
3c8564c30SJames Wright 
4d0593705SJames Wright #include <ceed-utils.h>
5c8564c30SJames Wright #include <ceed.h>
6c8564c30SJames Wright #include <ceed/backend.h>
7c8564c30SJames Wright #include <mat-ceed-impl.h>
8c8564c30SJames Wright #include <mat-ceed.h>
9c8564c30SJames Wright #include <petscdmplex.h>
10c8564c30SJames Wright #include <stdlib.h>
11c8564c30SJames Wright #include <string.h>
12c8564c30SJames Wright 
13c8564c30SJames Wright PetscClassId  MATCEED_CLASSID;
14c8564c30SJames Wright PetscLogEvent MATCEED_MULT, MATCEED_MULT_TRANSPOSE;
15c8564c30SJames Wright 
16c8564c30SJames Wright /**
17c8564c30SJames Wright   @brief Register MATCEED log events.
18c8564c30SJames Wright 
19c8564c30SJames Wright   Not collective across MPI processes.
20c8564c30SJames Wright 
21c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
22c8564c30SJames Wright **/
23c8564c30SJames Wright static PetscErrorCode MatCeedRegisterLogEvents() {
24c8564c30SJames Wright   static bool registered = false;
25c8564c30SJames Wright 
26c8564c30SJames Wright   PetscFunctionBeginUser;
27c8564c30SJames Wright   if (registered) PetscFunctionReturn(PETSC_SUCCESS);
28c8564c30SJames Wright   PetscCall(PetscClassIdRegister("MATCEED", &MATCEED_CLASSID));
29c8564c30SJames Wright   PetscCall(PetscLogEventRegister("MATCEED Mult", MATCEED_CLASSID, &MATCEED_MULT));
30c8564c30SJames Wright   PetscCall(PetscLogEventRegister("MATCEED Mult Transpose", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE));
31c8564c30SJames Wright   registered = true;
32c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
33c8564c30SJames Wright }
34c8564c30SJames Wright 
35c8564c30SJames Wright /**
36c8564c30SJames Wright   @brief Setup inner `Mat` for `PC` operations not directly supported by libCEED.
37c8564c30SJames Wright 
38c8564c30SJames Wright   Collective across MPI processes.
39c8564c30SJames Wright 
40c8564c30SJames Wright   @param[in]   mat_ceed   `MATCEED` to setup
41c8564c30SJames Wright   @param[out]  mat_inner  Inner `Mat`
42c8564c30SJames Wright 
43c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
44c8564c30SJames Wright **/
45c8564c30SJames Wright static PetscErrorCode MatCeedSetupInnerMat(Mat mat_ceed, Mat *mat_inner) {
46c8564c30SJames Wright   MatCeedContext ctx;
47c8564c30SJames Wright 
48c8564c30SJames Wright   PetscFunctionBeginUser;
49c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
50c8564c30SJames Wright 
51c8564c30SJames Wright   PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "PC only supported for MATCEED on a single DM");
52c8564c30SJames Wright 
53c8564c30SJames Wright   // Check cl mat type
54c8564c30SJames Wright   {
55c8564c30SJames Wright     PetscBool is_internal_mat_type_cl = PETSC_FALSE;
56c8564c30SJames Wright     char      internal_mat_type_cl[64];
57c8564c30SJames Wright 
58c8564c30SJames Wright     // Check for specific CL inner mat type for this Mat
59c8564c30SJames Wright     {
60c8564c30SJames Wright       const char *mat_ceed_prefix = NULL;
61c8564c30SJames Wright 
62c8564c30SJames Wright       PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix));
63c8564c30SJames Wright       PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL);
64c8564c30SJames Wright       PetscCall(PetscOptionsFList("-ceed_inner_mat_type", "MATCEED inner assembled MatType for PC support", NULL, MatList, internal_mat_type_cl,
65c8564c30SJames Wright                                   internal_mat_type_cl, sizeof(internal_mat_type_cl), &is_internal_mat_type_cl));
66c8564c30SJames Wright       PetscOptionsEnd();
67c8564c30SJames Wright       if (is_internal_mat_type_cl) {
68c8564c30SJames Wright         PetscCall(PetscFree(ctx->internal_mat_type));
69c8564c30SJames Wright         PetscCall(PetscStrallocpy(internal_mat_type_cl, &ctx->internal_mat_type));
70c8564c30SJames Wright       }
71c8564c30SJames Wright     }
72c8564c30SJames Wright   }
73c8564c30SJames Wright 
74c8564c30SJames Wright   // Create sparse matrix
75c8564c30SJames Wright   {
76c8564c30SJames Wright     MatType dm_mat_type, dm_mat_type_copy;
77c8564c30SJames Wright 
78c8564c30SJames Wright     PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type));
79c8564c30SJames Wright     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
80c8564c30SJames Wright     PetscCall(DMSetMatType(ctx->dm_x, ctx->internal_mat_type));
81c8564c30SJames Wright     PetscCall(DMCreateMatrix(ctx->dm_x, mat_inner));
82c8564c30SJames Wright     PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy));
83c8564c30SJames Wright     PetscCall(PetscFree(dm_mat_type_copy));
84c8564c30SJames Wright   }
85c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
86c8564c30SJames Wright }
87c8564c30SJames Wright 
88c8564c30SJames Wright /**
89c8564c30SJames Wright   @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar.
90c8564c30SJames Wright          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
91c8564c30SJames Wright          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
92c8564c30SJames Wright 
93c8564c30SJames Wright   Collective across MPI processes.
94c8564c30SJames Wright 
95c8564c30SJames Wright   @param[in]      mat_ceed  `MATCEED` to assemble
96c8564c30SJames Wright   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
97c8564c30SJames Wright 
98c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
99c8564c30SJames Wright **/
100c8564c30SJames Wright static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) {
101c8564c30SJames Wright   MatCeedContext ctx;
102c8564c30SJames Wright 
103c8564c30SJames Wright   PetscFunctionBeginUser;
104c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
105c8564c30SJames Wright 
106c8564c30SJames Wright   // Check if COO pattern set
107c8564c30SJames Wright   {
108c8564c30SJames Wright     PetscInt index = -1;
109c8564c30SJames Wright 
110c8564c30SJames Wright     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
111c8564c30SJames Wright       if (ctx->mats_assembled_pbd[i] == mat_coo) index = i;
112c8564c30SJames Wright     }
113c8564c30SJames Wright     if (index == -1) {
114c8564c30SJames Wright       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
115c8564c30SJames Wright       CeedInt      *rows_ceed, *cols_ceed;
116c8564c30SJames Wright       PetscCount    num_entries;
117c8564c30SJames Wright       PetscLogStage stage_amg_setup;
118c8564c30SJames Wright 
119c8564c30SJames Wright       // -- Assemble sparsity pattern if mat hasn't been assembled before
120c8564c30SJames Wright       PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup));
121c8564c30SJames Wright       if (stage_amg_setup == -1) {
122c8564c30SJames Wright         PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup));
123c8564c30SJames Wright       }
124c8564c30SJames Wright       PetscCall(PetscLogStagePush(stage_amg_setup));
12554831c5fSJames Wright       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
126d0593705SJames Wright       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
127d0593705SJames Wright       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
128c8564c30SJames Wright       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
129c8564c30SJames Wright       free(rows_petsc);
130c8564c30SJames Wright       free(cols_petsc);
13154831c5fSJames Wright       if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd));
132c8564c30SJames Wright       PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd));
133c8564c30SJames Wright       ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo;
134c8564c30SJames Wright       PetscCall(PetscLogStagePop());
135c8564c30SJames Wright     }
136c8564c30SJames Wright   }
137c8564c30SJames Wright 
138c8564c30SJames Wright   // Assemble mat_ceed
139c8564c30SJames Wright   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
140c8564c30SJames Wright   {
141c8564c30SJames Wright     const CeedScalar *values;
142c8564c30SJames Wright     MatType           mat_type;
143c8564c30SJames Wright     CeedMemType       mem_type = CEED_MEM_HOST;
144c8564c30SJames Wright     PetscBool         is_spd, is_spd_known;
145c8564c30SJames Wright 
146c8564c30SJames Wright     PetscCall(MatGetType(mat_coo, &mat_type));
147c8564c30SJames Wright     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
148c8564c30SJames Wright     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
149c8564c30SJames Wright     else mem_type = CEED_MEM_HOST;
150c8564c30SJames Wright 
15154831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE));
15254831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values));
153c8564c30SJames Wright     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
154c8564c30SJames Wright     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
155c8564c30SJames Wright     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
15654831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values));
157c8564c30SJames Wright   }
158c8564c30SJames Wright   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
159c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
160c8564c30SJames Wright }
161c8564c30SJames Wright 
162c8564c30SJames Wright /**
163c8564c30SJames Wright   @brief Assemble inner `Mat` for diagonal `PC` operations
164c8564c30SJames Wright 
165c8564c30SJames Wright   Collective across MPI processes.
166c8564c30SJames Wright 
167c8564c30SJames Wright   @param[in]   mat_ceed      `MATCEED` to invert
168c8564c30SJames Wright   @param[in]   use_ceed_pbd  Boolean flag to use libCEED PBD assembly
169c8564c30SJames Wright   @param[out]  mat_inner     Inner `Mat` for diagonal operations
170c8564c30SJames Wright 
171c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
172c8564c30SJames Wright **/
173c8564c30SJames Wright static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) {
174c8564c30SJames Wright   MatCeedContext ctx;
175c8564c30SJames Wright 
176c8564c30SJames Wright   PetscFunctionBeginUser;
177c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
178c8564c30SJames Wright   if (use_ceed_pbd) {
179c8564c30SJames Wright     // Check if COO pattern set
180c8564c30SJames Wright     if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedSetupInnerMat(mat_ceed, &ctx->mat_assembled_pbd_internal));
181c8564c30SJames Wright 
182c8564c30SJames Wright     // Assemble mat_assembled_full_internal
183c8564c30SJames Wright     PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal));
184c8564c30SJames Wright     if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal;
185c8564c30SJames Wright   } else {
186c8564c30SJames Wright     // Check if COO pattern set
187c8564c30SJames Wright     if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedSetupInnerMat(mat_ceed, &ctx->mat_assembled_full_internal));
188c8564c30SJames Wright 
189c8564c30SJames Wright     // Assemble mat_assembled_full_internal
190c8564c30SJames Wright     PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
191c8564c30SJames Wright     if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal;
192c8564c30SJames Wright   }
193c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
194c8564c30SJames Wright }
195c8564c30SJames Wright 
196c8564c30SJames Wright /**
197c8564c30SJames Wright   @brief Get `MATCEED` diagonal block for Jacobi.
198c8564c30SJames Wright 
199c8564c30SJames Wright   Collective across MPI processes.
200c8564c30SJames Wright 
201c8564c30SJames Wright   @param[in]   mat_ceed   `MATCEED` to invert
202c8564c30SJames Wright   @param[out]  mat_block  The diagonal block matrix
203c8564c30SJames Wright 
204c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
205c8564c30SJames Wright **/
206c8564c30SJames Wright static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) {
207c8564c30SJames Wright   Mat            mat_inner = NULL;
208c8564c30SJames Wright   MatCeedContext ctx;
209c8564c30SJames Wright 
210c8564c30SJames Wright   PetscFunctionBeginUser;
211c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
212c8564c30SJames Wright 
213c8564c30SJames Wright   // Assemble inner mat if needed
214c8564c30SJames Wright   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
215c8564c30SJames Wright 
216c8564c30SJames Wright   // Get block diagonal
217c8564c30SJames Wright   PetscCall(MatGetDiagonalBlock(mat_inner, mat_block));
218c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
219c8564c30SJames Wright }
220c8564c30SJames Wright 
221c8564c30SJames Wright /**
222c8564c30SJames Wright   @brief Invert `MATCEED` diagonal block for Jacobi.
223c8564c30SJames Wright 
224c8564c30SJames Wright   Collective across MPI processes.
225c8564c30SJames Wright 
226c8564c30SJames Wright   @param[in]   mat_ceed  `MATCEED` to invert
227c8564c30SJames Wright   @param[out]  values    The block inverses in column major order
228c8564c30SJames Wright 
229c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
230c8564c30SJames Wright **/
231c8564c30SJames Wright static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) {
232c8564c30SJames Wright   Mat            mat_inner = NULL;
233c8564c30SJames Wright   MatCeedContext ctx;
234c8564c30SJames Wright 
235c8564c30SJames Wright   PetscFunctionBeginUser;
236c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
237c8564c30SJames Wright 
238c8564c30SJames Wright   // Assemble inner mat if needed
239c8564c30SJames Wright   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
240c8564c30SJames Wright 
241c8564c30SJames Wright   // Invert PB diagonal
242c8564c30SJames Wright   PetscCall(MatInvertBlockDiagonal(mat_inner, values));
243c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
244c8564c30SJames Wright }
245c8564c30SJames Wright 
246c8564c30SJames Wright /**
247c8564c30SJames Wright   @brief Invert `MATCEED` variable diagonal block for Jacobi.
248c8564c30SJames Wright 
249c8564c30SJames Wright   Collective across MPI processes.
250c8564c30SJames Wright 
251c8564c30SJames Wright   @param[in]   mat_ceed     `MATCEED` to invert
252c8564c30SJames Wright   @param[in]   num_blocks   The number of blocks on the process
253c8564c30SJames Wright   @param[in]   block_sizes  The size of each block on the process
254c8564c30SJames Wright   @param[out]  values       The block inverses in column major order
255c8564c30SJames Wright 
256c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
257c8564c30SJames Wright **/
258c8564c30SJames Wright static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) {
259c8564c30SJames Wright   Mat            mat_inner = NULL;
260c8564c30SJames Wright   MatCeedContext ctx;
261c8564c30SJames Wright 
262c8564c30SJames Wright   PetscFunctionBeginUser;
263c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
264c8564c30SJames Wright 
265c8564c30SJames Wright   // Assemble inner mat if needed
266c8564c30SJames Wright   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner));
267c8564c30SJames Wright 
268c8564c30SJames Wright   // Invert PB diagonal
269c8564c30SJames Wright   PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values));
270c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
271c8564c30SJames Wright }
272c8564c30SJames Wright 
273*3933d9a0SJames Wright /**
274*3933d9a0SJames Wright   @brief View `MATCEED`.
275*3933d9a0SJames Wright 
276*3933d9a0SJames Wright   Collective across MPI processes.
277*3933d9a0SJames Wright 
278*3933d9a0SJames Wright   @param[in]   mat_ceed  `MATCEED` to view
279*3933d9a0SJames Wright   @param[in]   viewer    The visualization context
280*3933d9a0SJames Wright 
281*3933d9a0SJames Wright   @return An error code: 0 - success, otherwise - failure
282*3933d9a0SJames Wright **/
283*3933d9a0SJames Wright static PetscErrorCode MatView_Ceed(Mat mat_ceed, PetscViewer viewer) {
284*3933d9a0SJames Wright   PetscBool         is_ascii;
285*3933d9a0SJames Wright   PetscViewerFormat format;
286*3933d9a0SJames Wright   PetscMPIInt       size;
287*3933d9a0SJames Wright   MatCeedContext    ctx;
288*3933d9a0SJames Wright 
289*3933d9a0SJames Wright   PetscFunctionBeginUser;
290*3933d9a0SJames Wright   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
291*3933d9a0SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
292*3933d9a0SJames Wright   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)mat_ceed), &viewer));
293*3933d9a0SJames Wright 
294*3933d9a0SJames Wright   PetscCall(PetscViewerGetFormat(viewer, &format));
295*3933d9a0SJames Wright   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat_ceed), &size));
296*3933d9a0SJames Wright   if (size == 1 && format == PETSC_VIEWER_LOAD_BALANCE) PetscFunctionReturn(PETSC_SUCCESS);
297*3933d9a0SJames Wright 
298*3933d9a0SJames Wright   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &is_ascii));
299*3933d9a0SJames Wright   {
300*3933d9a0SJames Wright     FILE *file;
301*3933d9a0SJames Wright 
302*3933d9a0SJames Wright     PetscCall(PetscViewerASCIIPrintf(viewer, "MatCEED:\n  Internal MatType:%s\n", ctx->internal_mat_type));
303*3933d9a0SJames Wright     PetscCall(PetscViewerASCIIGetPointer(viewer, &file));
304*3933d9a0SJames Wright     PetscCall(PetscViewerASCIIPrintf(viewer, " libCEED Operator:\n"));
305*3933d9a0SJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult, file));
306*3933d9a0SJames Wright     if (ctx->op_mult_transpose) {
307*3933d9a0SJames Wright       PetscCall(PetscViewerASCIIPrintf(viewer, "  libCEED Transpose Operator:\n"));
308*3933d9a0SJames Wright       PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult_transpose, file));
309*3933d9a0SJames Wright     }
310*3933d9a0SJames Wright   }
311*3933d9a0SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
312*3933d9a0SJames Wright }
313*3933d9a0SJames Wright 
314c8564c30SJames Wright // -----------------------------------------------------------------------------
315c8564c30SJames Wright // MatCeed
316c8564c30SJames Wright // -----------------------------------------------------------------------------
317c8564c30SJames Wright 
318c8564c30SJames Wright /**
319c8564c30SJames Wright   @brief Create PETSc `Mat` from libCEED operators.
320c8564c30SJames Wright 
321c8564c30SJames Wright   Collective across MPI processes.
322c8564c30SJames Wright 
323c8564c30SJames Wright   @param[in]   dm_x                      Input `DM`
324c8564c30SJames Wright   @param[in]   dm_y                      Output `DM`
325c8564c30SJames Wright   @param[in]   op_mult                   `CeedOperator` for forward evaluation
326c8564c30SJames Wright   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
327c8564c30SJames Wright   @param[out]  mat                        New MatCeed
328c8564c30SJames Wright 
329c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
330c8564c30SJames Wright **/
331c8564c30SJames Wright PetscErrorCode MatCeedCreate(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) {
332c8564c30SJames Wright   PetscInt       X_l_size, X_g_size, Y_l_size, Y_g_size;
333c8564c30SJames Wright   VecType        vec_type;
334c8564c30SJames Wright   MatCeedContext ctx;
335c8564c30SJames Wright 
336c8564c30SJames Wright   PetscFunctionBeginUser;
337c8564c30SJames Wright   PetscCall(MatCeedRegisterLogEvents());
338c8564c30SJames Wright 
339c8564c30SJames Wright   // Collect context data
340c8564c30SJames Wright   PetscCall(DMGetVecType(dm_x, &vec_type));
341c8564c30SJames Wright   {
342c8564c30SJames Wright     Vec X;
343c8564c30SJames Wright 
344c8564c30SJames Wright     PetscCall(DMGetGlobalVector(dm_x, &X));
345c8564c30SJames Wright     PetscCall(VecGetSize(X, &X_g_size));
346c8564c30SJames Wright     PetscCall(VecGetLocalSize(X, &X_l_size));
347c8564c30SJames Wright     PetscCall(DMRestoreGlobalVector(dm_x, &X));
348c8564c30SJames Wright   }
349c8564c30SJames Wright   if (dm_y) {
350c8564c30SJames Wright     Vec Y;
351c8564c30SJames Wright 
352c8564c30SJames Wright     PetscCall(DMGetGlobalVector(dm_y, &Y));
353c8564c30SJames Wright     PetscCall(VecGetSize(Y, &Y_g_size));
354c8564c30SJames Wright     PetscCall(VecGetLocalSize(Y, &Y_l_size));
355c8564c30SJames Wright     PetscCall(DMRestoreGlobalVector(dm_y, &Y));
356c8564c30SJames Wright   } else {
357c8564c30SJames Wright     dm_y     = dm_x;
358c8564c30SJames Wright     Y_g_size = X_g_size;
359c8564c30SJames Wright     Y_l_size = X_l_size;
360c8564c30SJames Wright   }
361c8564c30SJames Wright   // Create context
362c8564c30SJames Wright   {
363c8564c30SJames Wright     Vec X_loc, Y_loc_transpose = NULL;
364c8564c30SJames Wright 
365c8564c30SJames Wright     PetscCall(DMCreateLocalVector(dm_x, &X_loc));
366c8564c30SJames Wright     PetscCall(VecZeroEntries(X_loc));
367c8564c30SJames Wright     if (op_mult_transpose) {
368c8564c30SJames Wright       PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose));
369c8564c30SJames Wright       PetscCall(VecZeroEntries(Y_loc_transpose));
370c8564c30SJames Wright     }
371c8564c30SJames Wright     PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE, &ctx));
372c8564c30SJames Wright     PetscCall(VecDestroy(&X_loc));
373c8564c30SJames Wright     PetscCall(VecDestroy(&Y_loc_transpose));
374c8564c30SJames Wright   }
375c8564c30SJames Wright 
376c8564c30SJames Wright   // Create mat
377c8564c30SJames Wright   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat));
378c8564c30SJames Wright   PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED));
379c8564c30SJames Wright   // -- Set block and variable block sizes
380c8564c30SJames Wright   if (dm_x == dm_y) {
381c8564c30SJames Wright     MatType dm_mat_type, dm_mat_type_copy;
382c8564c30SJames Wright     Mat     temp_mat;
383c8564c30SJames Wright 
384c8564c30SJames Wright     PetscCall(DMGetMatType(dm_x, &dm_mat_type));
385c8564c30SJames Wright     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
386c8564c30SJames Wright     PetscCall(DMSetMatType(dm_x, MATAIJ));
387c8564c30SJames Wright     PetscCall(DMCreateMatrix(dm_x, &temp_mat));
388c8564c30SJames Wright     PetscCall(DMSetMatType(dm_x, dm_mat_type_copy));
389c8564c30SJames Wright     PetscCall(PetscFree(dm_mat_type_copy));
390c8564c30SJames Wright 
391c8564c30SJames Wright     {
392c8564c30SJames Wright       PetscInt        block_size, num_blocks, max_vblock_size = PETSC_INT_MAX;
393c8564c30SJames Wright       const PetscInt *vblock_sizes;
394c8564c30SJames Wright 
395c8564c30SJames Wright       // -- Get block sizes
396c8564c30SJames Wright       PetscCall(MatGetBlockSize(temp_mat, &block_size));
397c8564c30SJames Wright       PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes));
398c8564c30SJames Wright       {
399c8564c30SJames Wright         PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX};
400c8564c30SJames Wright 
401c8564c30SJames Wright         for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]);
402c8564c30SJames Wright         PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max));
403c8564c30SJames Wright         max_vblock_size = global_min_max[1];
404c8564c30SJames Wright       }
405c8564c30SJames Wright 
406c8564c30SJames Wright       // -- Copy block sizes
407c8564c30SJames Wright       if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size));
408c8564c30SJames Wright       if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes));
409c8564c30SJames Wright 
410c8564c30SJames Wright       // -- Check libCEED compatibility
411c8564c30SJames Wright       {
412c8564c30SJames Wright         bool is_composite;
413c8564c30SJames Wright 
414c8564c30SJames Wright         ctx->is_ceed_pbd_valid  = PETSC_TRUE;
415c8564c30SJames Wright         ctx->is_ceed_vpbd_valid = PETSC_TRUE;
41654831c5fSJames Wright         PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite));
417c8564c30SJames Wright         if (is_composite) {
418c8564c30SJames Wright           CeedInt       num_sub_operators;
419c8564c30SJames Wright           CeedOperator *sub_operators;
420c8564c30SJames Wright 
42154831c5fSJames Wright           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators));
42254831c5fSJames Wright           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators));
423c8564c30SJames Wright           for (CeedInt i = 0; i < num_sub_operators; i++) {
424c8564c30SJames Wright             CeedInt                  num_bases, num_comp;
425c8564c30SJames Wright             CeedBasis               *active_bases;
426c8564c30SJames Wright             CeedOperatorAssemblyData assembly_data;
427c8564c30SJames Wright 
42854831c5fSJames Wright             PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data));
42954831c5fSJames Wright             PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
43054831c5fSJames Wright             PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
431c8564c30SJames Wright             if (num_bases > 1) {
432c8564c30SJames Wright               ctx->is_ceed_pbd_valid  = PETSC_FALSE;
433c8564c30SJames Wright               ctx->is_ceed_vpbd_valid = PETSC_FALSE;
434c8564c30SJames Wright             }
435c8564c30SJames Wright             if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
436c8564c30SJames Wright             if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
437c8564c30SJames Wright           }
438c8564c30SJames Wright         } else {
439c8564c30SJames Wright           // LCOV_EXCL_START
440c8564c30SJames Wright           CeedInt                  num_bases, num_comp;
441c8564c30SJames Wright           CeedBasis               *active_bases;
442c8564c30SJames Wright           CeedOperatorAssemblyData assembly_data;
443c8564c30SJames Wright 
44454831c5fSJames Wright           PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data));
44554831c5fSJames Wright           PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
44654831c5fSJames Wright           PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
447c8564c30SJames Wright           if (num_bases > 1) {
448c8564c30SJames Wright             ctx->is_ceed_pbd_valid  = PETSC_FALSE;
449c8564c30SJames Wright             ctx->is_ceed_vpbd_valid = PETSC_FALSE;
450c8564c30SJames Wright           }
451c8564c30SJames Wright           if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
452c8564c30SJames Wright           if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
453c8564c30SJames Wright           // LCOV_EXCL_STOP
454c8564c30SJames Wright         }
455c8564c30SJames Wright         {
456c8564c30SJames Wright           PetscInt local_is_valid[2], global_is_valid[2];
457c8564c30SJames Wright 
458c8564c30SJames Wright           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid;
459c8564c30SJames Wright           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
460c8564c30SJames Wright           ctx->is_ceed_pbd_valid = global_is_valid[0];
461c8564c30SJames Wright           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid;
462c8564c30SJames Wright           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
463c8564c30SJames Wright           ctx->is_ceed_vpbd_valid = global_is_valid[0];
464c8564c30SJames Wright         }
465c8564c30SJames Wright       }
466c8564c30SJames Wright     }
467c8564c30SJames Wright     PetscCall(MatDestroy(&temp_mat));
468c8564c30SJames Wright   }
469c8564c30SJames Wright   // -- Set internal mat type
470c8564c30SJames Wright   {
471c8564c30SJames Wright     VecType vec_type;
472c8564c30SJames Wright     MatType internal_mat_type = MATAIJ;
473c8564c30SJames Wright 
474c8564c30SJames Wright     PetscCall(VecGetType(ctx->X_loc, &vec_type));
475c8564c30SJames Wright     if (strstr(vec_type, VECCUDA)) internal_mat_type = MATAIJCUSPARSE;
476c8564c30SJames Wright     else if (strstr(vec_type, VECKOKKOS)) internal_mat_type = MATAIJKOKKOS;
477c8564c30SJames Wright     else internal_mat_type = MATAIJ;
478c8564c30SJames Wright     PetscCall(PetscStrallocpy(internal_mat_type, &ctx->internal_mat_type));
479c8564c30SJames Wright   }
480c8564c30SJames Wright   // -- Set mat operations
481c8564c30SJames Wright   PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
482*3933d9a0SJames Wright   PetscCall(MatShellSetOperation(*mat, MATOP_VIEW, (void (*)(void))MatView_Ceed));
483c8564c30SJames Wright   PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed));
484c8564c30SJames Wright   if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
485c8564c30SJames Wright   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
486c8564c30SJames Wright   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
487c8564c30SJames Wright   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
488c8564c30SJames Wright   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
489c8564c30SJames Wright   PetscCall(MatShellSetVecType(*mat, vec_type));
490c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
491c8564c30SJames Wright }
492c8564c30SJames Wright 
493c8564c30SJames Wright /**
494c8564c30SJames Wright   @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`.
495c8564c30SJames Wright 
496c8564c30SJames Wright   Collective across MPI processes.
497c8564c30SJames Wright 
498c8564c30SJames Wright   @param[in]   mat_ceed   `MATCEED` to copy from
499c8564c30SJames Wright   @param[out]  mat_other  `MatShell` or `MATCEED` to copy into
500c8564c30SJames Wright 
501c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
502c8564c30SJames Wright **/
503c8564c30SJames Wright PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) {
504c8564c30SJames Wright   PetscFunctionBeginUser;
505c8564c30SJames Wright   PetscCall(MatCeedRegisterLogEvents());
506c8564c30SJames Wright 
507c8564c30SJames Wright   // Check type compatibility
508c8564c30SJames Wright   {
509c8564c30SJames Wright     MatType mat_type_ceed, mat_type_other;
510c8564c30SJames Wright 
511c8564c30SJames Wright     PetscCall(MatGetType(mat_ceed, &mat_type_ceed));
512c8564c30SJames Wright     PetscCheck(!strcmp(mat_type_ceed, MATCEED), PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED);
513c8564c30SJames Wright     PetscCall(MatGetType(mat_ceed, &mat_type_other));
514c8564c30SJames Wright     PetscCheck(!strcmp(mat_type_other, MATCEED) || !strcmp(mat_type_other, MATSHELL), PETSC_COMM_SELF, PETSC_ERR_LIB,
515c8564c30SJames Wright                "mat_other must have type " MATCEED " or " MATSHELL);
516c8564c30SJames Wright   }
517c8564c30SJames Wright 
518c8564c30SJames Wright   // Check dimension compatibility
519c8564c30SJames Wright   {
520c8564c30SJames Wright     PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size;
521c8564c30SJames Wright 
522c8564c30SJames Wright     PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size));
523c8564c30SJames Wright     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size));
524c8564c30SJames Wright     PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size));
525c8564c30SJames Wright     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size));
526c8564c30SJames Wright     PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) &&
527c8564c30SJames Wright                    (X_l_ceed_size == X_l_other_size),
528c8564c30SJames Wright                PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ,
529c8564c30SJames Wright                "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT
530c8564c30SJames Wright                "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT
531c8564c30SJames Wright                ", %" PetscInt_FMT ")",
532c8564c30SJames Wright                Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size);
533c8564c30SJames Wright   }
534c8564c30SJames Wright 
535c8564c30SJames Wright   // Convert
536c8564c30SJames Wright   {
537c8564c30SJames Wright     VecType        vec_type;
538c8564c30SJames Wright     MatCeedContext ctx;
539c8564c30SJames Wright 
540c8564c30SJames Wright     PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED));
541c8564c30SJames Wright     PetscCall(MatShellGetContext(mat_ceed, &ctx));
542c8564c30SJames Wright     PetscCall(MatCeedContextReference(ctx));
543c8564c30SJames Wright     PetscCall(MatShellSetContext(mat_other, ctx));
544c8564c30SJames Wright     PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
545*3933d9a0SJames Wright     PetscCall(MatShellSetOperation(mat_other, MATOP_VIEW, (void (*)(void))MatView_Ceed));
546c8564c30SJames Wright     PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed));
547c8564c30SJames Wright     if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
548c8564c30SJames Wright     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
549c8564c30SJames Wright     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
550c8564c30SJames Wright     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
551c8564c30SJames Wright     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
552c8564c30SJames Wright     {
553c8564c30SJames Wright       PetscInt block_size;
554c8564c30SJames Wright 
555c8564c30SJames Wright       PetscCall(MatGetBlockSize(mat_ceed, &block_size));
556c8564c30SJames Wright       if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size));
557c8564c30SJames Wright     }
558c8564c30SJames Wright     {
559c8564c30SJames Wright       PetscInt        num_blocks;
560c8564c30SJames Wright       const PetscInt *block_sizes;
561c8564c30SJames Wright 
562c8564c30SJames Wright       PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes));
563c8564c30SJames Wright       if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes));
564c8564c30SJames Wright     }
565c8564c30SJames Wright     PetscCall(DMGetVecType(ctx->dm_x, &vec_type));
566c8564c30SJames Wright     PetscCall(MatShellSetVecType(mat_other, vec_type));
567c8564c30SJames Wright   }
568c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
569c8564c30SJames Wright }
570c8564c30SJames Wright 
571c8564c30SJames Wright /**
572c8564c30SJames Wright   @brief Assemble a `MATCEED` into a `MATAIJ` or similar.
573c8564c30SJames Wright          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
574c8564c30SJames Wright          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
575c8564c30SJames Wright 
576c8564c30SJames Wright   Collective across MPI processes.
577c8564c30SJames Wright 
578c8564c30SJames Wright   @param[in]      mat_ceed  `MATCEED` to assemble
579c8564c30SJames Wright   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
580c8564c30SJames Wright 
581c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
582c8564c30SJames Wright **/
583c8564c30SJames Wright PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) {
584c8564c30SJames Wright   MatCeedContext ctx;
585c8564c30SJames Wright 
586c8564c30SJames Wright   PetscFunctionBeginUser;
587c8564c30SJames Wright   PetscCall(MatShellGetContext(mat_ceed, &ctx));
588c8564c30SJames Wright 
589c8564c30SJames Wright   // Check if COO pattern set
590c8564c30SJames Wright   {
591c8564c30SJames Wright     PetscInt index = -1;
592c8564c30SJames Wright 
593c8564c30SJames Wright     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
594c8564c30SJames Wright       if (ctx->mats_assembled_full[i] == mat_coo) index = i;
595c8564c30SJames Wright     }
596c8564c30SJames Wright     if (index == -1) {
597c8564c30SJames Wright       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
598c8564c30SJames Wright       CeedInt      *rows_ceed, *cols_ceed;
599c8564c30SJames Wright       PetscCount    num_entries;
600c8564c30SJames Wright       PetscLogStage stage_amg_setup;
601c8564c30SJames Wright 
602c8564c30SJames Wright       // -- Assemble sparsity pattern if mat hasn't been assembled before
603c8564c30SJames Wright       PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup));
604c8564c30SJames Wright       if (stage_amg_setup == -1) {
605c8564c30SJames Wright         PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup));
606c8564c30SJames Wright       }
607c8564c30SJames Wright       PetscCall(PetscLogStagePush(stage_amg_setup));
60854831c5fSJames Wright       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
609d0593705SJames Wright       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
610d0593705SJames Wright       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
611c8564c30SJames Wright       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
612c8564c30SJames Wright       free(rows_petsc);
613c8564c30SJames Wright       free(cols_petsc);
61454831c5fSJames Wright       if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full));
615c8564c30SJames Wright       PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full));
616c8564c30SJames Wright       ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo;
617c8564c30SJames Wright       PetscCall(PetscLogStagePop());
618c8564c30SJames Wright     }
619c8564c30SJames Wright   }
620c8564c30SJames Wright 
621c8564c30SJames Wright   // Assemble mat_ceed
622c8564c30SJames Wright   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
623c8564c30SJames Wright   {
624c8564c30SJames Wright     const CeedScalar *values;
625c8564c30SJames Wright     MatType           mat_type;
626c8564c30SJames Wright     CeedMemType       mem_type = CEED_MEM_HOST;
627c8564c30SJames Wright     PetscBool         is_spd, is_spd_known;
628c8564c30SJames Wright 
629c8564c30SJames Wright     PetscCall(MatGetType(mat_coo, &mat_type));
630c8564c30SJames Wright     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
631c8564c30SJames Wright     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
632c8564c30SJames Wright     else mem_type = CEED_MEM_HOST;
633c8564c30SJames Wright 
63454831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full));
63554831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values));
636c8564c30SJames Wright     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
637c8564c30SJames Wright     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
638c8564c30SJames Wright     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
63954831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values));
640c8564c30SJames Wright   }
641c8564c30SJames Wright   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
642c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
643c8564c30SJames Wright }
644c8564c30SJames Wright 
645c8564c30SJames Wright /**
646c8564c30SJames Wright   @brief Set user context for a `MATCEED`.
647c8564c30SJames Wright 
648c8564c30SJames Wright   Collective across MPI processes.
649c8564c30SJames Wright 
650c8564c30SJames Wright   @param[in,out]  mat  `MATCEED`
651c8564c30SJames Wright   @param[in]      f    The context destroy function, or NULL
652c8564c30SJames Wright   @param[in]      ctx  User context, or NULL to unset
653c8564c30SJames Wright 
654c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
655c8564c30SJames Wright **/
656c8564c30SJames Wright PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) {
657c8564c30SJames Wright   PetscContainer user_ctx = NULL;
658c8564c30SJames Wright 
659c8564c30SJames Wright   PetscFunctionBeginUser;
660c8564c30SJames Wright   if (ctx) {
661c8564c30SJames Wright     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx));
662c8564c30SJames Wright     PetscCall(PetscContainerSetPointer(user_ctx, ctx));
663c8564c30SJames Wright     PetscCall(PetscContainerSetUserDestroy(user_ctx, f));
664c8564c30SJames Wright   }
665c8564c30SJames Wright   PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx));
666c8564c30SJames Wright   PetscCall(PetscContainerDestroy(&user_ctx));
667c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
668c8564c30SJames Wright }
669c8564c30SJames Wright 
670c8564c30SJames Wright /**
671c8564c30SJames Wright   @brief Retrieve the user context for a `MATCEED`.
672c8564c30SJames Wright 
673c8564c30SJames Wright   Collective across MPI processes.
674c8564c30SJames Wright 
675c8564c30SJames Wright   @param[in,out]  mat  `MATCEED`
676c8564c30SJames Wright   @param[in]      ctx  User context
677c8564c30SJames Wright 
678c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
679c8564c30SJames Wright **/
680c8564c30SJames Wright PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) {
681c8564c30SJames Wright   PetscContainer user_ctx;
682c8564c30SJames Wright 
683c8564c30SJames Wright   PetscFunctionBeginUser;
684c8564c30SJames Wright   PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx));
685c8564c30SJames Wright   if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx));
686c8564c30SJames Wright   else *(void **)ctx = NULL;
687c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
688c8564c30SJames Wright }
689c8564c30SJames Wright 
690c8564c30SJames Wright /**
691c8564c30SJames Wright   @brief Sets the inner matrix type as a string from the `MATCEED`.
692c8564c30SJames Wright 
693c8564c30SJames Wright   Collective across MPI processes.
694c8564c30SJames Wright 
695c8564c30SJames Wright   @param[in,out]  mat   `MATCEED`
696c8564c30SJames Wright   @param[in]      type  Inner `MatType` to set
697c8564c30SJames Wright 
698c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
699c8564c30SJames Wright **/
700c8564c30SJames Wright PetscErrorCode MatCeedSetInnerMatType(Mat mat, MatType type) {
701c8564c30SJames Wright   MatCeedContext ctx;
702c8564c30SJames Wright 
703c8564c30SJames Wright   PetscFunctionBeginUser;
704c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
705c8564c30SJames Wright   // Check if same
706c8564c30SJames Wright   {
707c8564c30SJames Wright     size_t    len_old, len_new;
708c8564c30SJames Wright     PetscBool is_same = PETSC_FALSE;
709c8564c30SJames Wright 
710c8564c30SJames Wright     PetscCall(PetscStrlen(ctx->internal_mat_type, &len_old));
711c8564c30SJames Wright     PetscCall(PetscStrlen(type, &len_new));
712c8564c30SJames Wright     if (len_old == len_new) PetscCall(PetscStrncmp(ctx->internal_mat_type, type, len_old, &is_same));
713c8564c30SJames Wright     if (is_same) PetscFunctionReturn(PETSC_SUCCESS);
714c8564c30SJames Wright   }
715c8564c30SJames Wright   // Clean up old mats in different format
716c8564c30SJames Wright   // LCOV_EXCL_START
717c8564c30SJames Wright   if (ctx->mat_assembled_full_internal) {
718c8564c30SJames Wright     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
719c8564c30SJames Wright       if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) {
720c8564c30SJames Wright         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) {
721c8564c30SJames Wright           ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j];
722c8564c30SJames Wright         }
723c8564c30SJames Wright         ctx->num_mats_assembled_full--;
724c8564c30SJames Wright         // Note: we'll realloc this array again, so no need to shrink the allocation
725c8564c30SJames Wright         PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
726c8564c30SJames Wright       }
727c8564c30SJames Wright     }
728c8564c30SJames Wright   }
729c8564c30SJames Wright   if (ctx->mat_assembled_pbd_internal) {
730c8564c30SJames Wright     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
731c8564c30SJames Wright       if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) {
732c8564c30SJames Wright         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) {
733c8564c30SJames Wright           ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j];
734c8564c30SJames Wright         }
735c8564c30SJames Wright         // Note: we'll realloc this array again, so no need to shrink the allocation
736c8564c30SJames Wright         ctx->num_mats_assembled_pbd--;
737c8564c30SJames Wright         PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
738c8564c30SJames Wright       }
739c8564c30SJames Wright     }
740c8564c30SJames Wright   }
741c8564c30SJames Wright   PetscCall(PetscFree(ctx->internal_mat_type));
742c8564c30SJames Wright   PetscCall(PetscStrallocpy(type, &ctx->internal_mat_type));
743c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
744c8564c30SJames Wright   // LCOV_EXCL_STOP
745c8564c30SJames Wright }
746c8564c30SJames Wright 
747c8564c30SJames Wright /**
748c8564c30SJames Wright   @brief Gets the inner matrix type as a string from the `MATCEED`.
749c8564c30SJames Wright 
750c8564c30SJames Wright   Collective across MPI processes.
751c8564c30SJames Wright 
752c8564c30SJames Wright   @param[in,out]  mat   `MATCEED`
753c8564c30SJames Wright   @param[in]      type  Inner `MatType`
754c8564c30SJames Wright 
755c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
756c8564c30SJames Wright **/
757c8564c30SJames Wright PetscErrorCode MatCeedGetInnerMatType(Mat mat, MatType *type) {
758c8564c30SJames Wright   MatCeedContext ctx;
759c8564c30SJames Wright 
760c8564c30SJames Wright   PetscFunctionBeginUser;
761c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
762c8564c30SJames Wright   *type = ctx->internal_mat_type;
763c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
764c8564c30SJames Wright }
765c8564c30SJames Wright 
766c8564c30SJames Wright /**
767c8564c30SJames Wright   @brief Set a user defined matrix operation for a `MATCEED` matrix.
768c8564c30SJames Wright 
769c8564c30SJames Wright   Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by
770c8564c30SJames Wright `MatCeedSetContext()`.
771c8564c30SJames Wright 
772c8564c30SJames Wright   Collective across MPI processes.
773c8564c30SJames Wright 
774c8564c30SJames Wright   @param[in,out]  mat  `MATCEED`
775c8564c30SJames Wright   @param[in]      op   Name of the `MatOperation`
776c8564c30SJames Wright   @param[in]      g    Function that provides the operation
777c8564c30SJames Wright 
778c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
779c8564c30SJames Wright **/
780c8564c30SJames Wright PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) {
781c8564c30SJames Wright   PetscFunctionBeginUser;
782c8564c30SJames Wright   PetscCall(MatShellSetOperation(mat, op, g));
783c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
784c8564c30SJames Wright }
785c8564c30SJames Wright 
786c8564c30SJames Wright /**
787c8564c30SJames Wright   @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
788c8564c30SJames Wright 
789c8564c30SJames Wright   Not collective across MPI processes.
790c8564c30SJames Wright 
791c8564c30SJames Wright   @param[in,out]  mat              `MATCEED`
792c8564c30SJames Wright   @param[in]      X_loc            Input PETSc local vector, or NULL
793c8564c30SJames Wright   @param[in]      Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
794c8564c30SJames Wright 
795c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
796c8564c30SJames Wright **/
797c8564c30SJames Wright PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) {
798c8564c30SJames Wright   MatCeedContext ctx;
799c8564c30SJames Wright 
800c8564c30SJames Wright   PetscFunctionBeginUser;
801c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
802c8564c30SJames Wright   if (X_loc) {
803c8564c30SJames Wright     PetscInt len_old, len_new;
804c8564c30SJames Wright 
805c8564c30SJames Wright     PetscCall(VecGetSize(ctx->X_loc, &len_old));
806c8564c30SJames Wright     PetscCall(VecGetSize(X_loc, &len_new));
807c8564c30SJames Wright     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT,
808c8564c30SJames Wright                len_new, len_old);
809c8564c30SJames Wright     PetscCall(VecDestroy(&ctx->X_loc));
810c8564c30SJames Wright     ctx->X_loc = X_loc;
811c8564c30SJames Wright     PetscCall(PetscObjectReference((PetscObject)X_loc));
812c8564c30SJames Wright   }
813c8564c30SJames Wright   if (Y_loc_transpose) {
814c8564c30SJames Wright     PetscInt len_old, len_new;
815c8564c30SJames Wright 
816c8564c30SJames Wright     PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old));
817c8564c30SJames Wright     PetscCall(VecGetSize(Y_loc_transpose, &len_new));
818c8564c30SJames Wright     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB,
819c8564c30SJames Wright                "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old);
820c8564c30SJames Wright     PetscCall(VecDestroy(&ctx->Y_loc_transpose));
821c8564c30SJames Wright     ctx->Y_loc_transpose = Y_loc_transpose;
822c8564c30SJames Wright     PetscCall(PetscObjectReference((PetscObject)Y_loc_transpose));
823c8564c30SJames Wright   }
824c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
825c8564c30SJames Wright }
826c8564c30SJames Wright 
827c8564c30SJames Wright /**
828c8564c30SJames Wright   @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
829c8564c30SJames Wright 
830c8564c30SJames Wright   Not collective across MPI processes.
831c8564c30SJames Wright 
832c8564c30SJames Wright   @param[in,out]  mat              `MATCEED`
833c8564c30SJames Wright   @param[out]     X_loc            Input PETSc local vector, or NULL
834c8564c30SJames Wright   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
835c8564c30SJames Wright 
836c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
837c8564c30SJames Wright **/
838c8564c30SJames Wright PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
839c8564c30SJames Wright   MatCeedContext ctx;
840c8564c30SJames Wright 
841c8564c30SJames Wright   PetscFunctionBeginUser;
842c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
843c8564c30SJames Wright   if (X_loc) {
844c8564c30SJames Wright     *X_loc = ctx->X_loc;
845c8564c30SJames Wright     PetscCall(PetscObjectReference((PetscObject)*X_loc));
846c8564c30SJames Wright   }
847c8564c30SJames Wright   if (Y_loc_transpose) {
848c8564c30SJames Wright     *Y_loc_transpose = ctx->Y_loc_transpose;
849c8564c30SJames Wright     PetscCall(PetscObjectReference((PetscObject)*Y_loc_transpose));
850c8564c30SJames Wright   }
851c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
852c8564c30SJames Wright }
853c8564c30SJames Wright 
854c8564c30SJames Wright /**
855c8564c30SJames Wright   @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
856c8564c30SJames Wright 
857c8564c30SJames Wright   Not collective across MPI processes.
858c8564c30SJames Wright 
859c8564c30SJames Wright   @param[in,out]  mat              MatCeed
860c8564c30SJames Wright   @param[out]     X_loc            Input PETSc local vector, or NULL
861c8564c30SJames Wright   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
862c8564c30SJames Wright 
863c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
864c8564c30SJames Wright **/
865c8564c30SJames Wright PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
866c8564c30SJames Wright   PetscFunctionBeginUser;
867c8564c30SJames Wright   if (X_loc) PetscCall(VecDestroy(X_loc));
868c8564c30SJames Wright   if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose));
869c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
870c8564c30SJames Wright }
871c8564c30SJames Wright 
872c8564c30SJames Wright /**
873c8564c30SJames Wright   @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
874c8564c30SJames Wright 
875c8564c30SJames Wright   Not collective across MPI processes.
876c8564c30SJames Wright 
877c8564c30SJames Wright   @param[in,out]  mat                MatCeed
878c8564c30SJames Wright   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
879c8564c30SJames Wright   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
880c8564c30SJames Wright 
881c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
882c8564c30SJames Wright **/
883c8564c30SJames Wright PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
884c8564c30SJames Wright   MatCeedContext ctx;
885c8564c30SJames Wright 
886c8564c30SJames Wright   PetscFunctionBeginUser;
887c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
888c8564c30SJames Wright   if (op_mult) {
889c8564c30SJames Wright     *op_mult = NULL;
89054831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult));
891c8564c30SJames Wright   }
892c8564c30SJames Wright   if (op_mult_transpose) {
893c8564c30SJames Wright     *op_mult_transpose = NULL;
89454831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose));
895c8564c30SJames Wright   }
896c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
897c8564c30SJames Wright }
898c8564c30SJames Wright 
899c8564c30SJames Wright /**
900c8564c30SJames Wright   @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
901c8564c30SJames Wright 
902c8564c30SJames Wright   Not collective across MPI processes.
903c8564c30SJames Wright 
904c8564c30SJames Wright   @param[in,out]  mat                MatCeed
905c8564c30SJames Wright   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
906c8564c30SJames Wright   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
907c8564c30SJames Wright 
908c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
909c8564c30SJames Wright **/
910c8564c30SJames Wright PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
911c8564c30SJames Wright   MatCeedContext ctx;
912c8564c30SJames Wright 
913c8564c30SJames Wright   PetscFunctionBeginUser;
914c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
91554831c5fSJames Wright   if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult));
91654831c5fSJames Wright   if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose));
917c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
918c8564c30SJames Wright }
919c8564c30SJames Wright 
920c8564c30SJames Wright /**
921c8564c30SJames Wright   @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
922c8564c30SJames Wright 
923c8564c30SJames Wright   Not collective across MPI processes.
924c8564c30SJames Wright 
925c8564c30SJames Wright   @param[in,out]  mat                       MatCeed
926c8564c30SJames Wright   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
927c8564c30SJames Wright   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
928c8564c30SJames Wright 
929c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
930c8564c30SJames Wright **/
931c8564c30SJames Wright PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
932c8564c30SJames Wright   MatCeedContext ctx;
933c8564c30SJames Wright 
934c8564c30SJames Wright   PetscFunctionBeginUser;
935c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
936c8564c30SJames Wright   if (log_event_mult) ctx->log_event_mult = log_event_mult;
937c8564c30SJames Wright   if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose;
938c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
939c8564c30SJames Wright }
940c8564c30SJames Wright 
941c8564c30SJames Wright /**
942c8564c30SJames Wright   @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
943c8564c30SJames Wright 
944c8564c30SJames Wright   Not collective across MPI processes.
945c8564c30SJames Wright 
946c8564c30SJames Wright   @param[in,out]  mat                       MatCeed
947c8564c30SJames Wright   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
948c8564c30SJames Wright   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
949c8564c30SJames Wright 
950c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
951c8564c30SJames Wright **/
952c8564c30SJames Wright PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
953c8564c30SJames Wright   MatCeedContext ctx;
954c8564c30SJames Wright 
955c8564c30SJames Wright   PetscFunctionBeginUser;
956c8564c30SJames Wright   PetscCall(MatShellGetContext(mat, &ctx));
957c8564c30SJames Wright   if (log_event_mult) *log_event_mult = ctx->log_event_mult;
958c8564c30SJames Wright   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose;
959c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
960c8564c30SJames Wright }
961c8564c30SJames Wright 
962c8564c30SJames Wright // -----------------------------------------------------------------------------
963c8564c30SJames Wright // Operator context data
964c8564c30SJames Wright // -----------------------------------------------------------------------------
965c8564c30SJames Wright 
966c8564c30SJames Wright /**
967c8564c30SJames Wright   @brief Setup context data for operator application.
968c8564c30SJames Wright 
969c8564c30SJames Wright   Collective across MPI processes.
970c8564c30SJames Wright 
971c8564c30SJames Wright   @param[in]   dm_x                      Input `DM`
972c8564c30SJames Wright   @param[in]   dm_y                      Output `DM`
973c8564c30SJames Wright   @param[in]   X_loc                     Input PETSc local vector, or NULL
974c8564c30SJames Wright   @param[in]   Y_loc_transpose           Input PETSc local vector for transpose operation, or NULL
975c8564c30SJames Wright   @param[in]   op_mult                   `CeedOperator` for forward evaluation
976c8564c30SJames Wright   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
977c8564c30SJames Wright   @param[in]   log_event_mult            `PetscLogEvent` for forward evaluation
978c8564c30SJames Wright   @param[in]   log_event_mult_transpose  `PetscLogEvent` for transpose evaluation
979c8564c30SJames Wright   @param[out]  ctx                       Context data for operator evaluation
980c8564c30SJames Wright 
981c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
982c8564c30SJames Wright **/
983c8564c30SJames Wright PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose,
984c8564c30SJames Wright                                     PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, MatCeedContext *ctx) {
985c8564c30SJames Wright   CeedSize x_loc_len, y_loc_len;
986c8564c30SJames Wright 
987c8564c30SJames Wright   PetscFunctionBeginUser;
988c8564c30SJames Wright 
989c8564c30SJames Wright   // Allocate
990c8564c30SJames Wright   PetscCall(PetscNew(ctx));
991c8564c30SJames Wright   (*ctx)->ref_count = 1;
992c8564c30SJames Wright 
993c8564c30SJames Wright   // Logging
994c8564c30SJames Wright   (*ctx)->log_event_mult           = log_event_mult;
995c8564c30SJames Wright   (*ctx)->log_event_mult_transpose = log_event_mult_transpose;
996c8564c30SJames Wright 
997c8564c30SJames Wright   // PETSc objects
998c8564c30SJames Wright   PetscCall(PetscObjectReference((PetscObject)dm_x));
999c8564c30SJames Wright   (*ctx)->dm_x = dm_x;
1000c8564c30SJames Wright   PetscCall(PetscObjectReference((PetscObject)dm_y));
1001c8564c30SJames Wright   (*ctx)->dm_y = dm_y;
1002c8564c30SJames Wright   if (X_loc) PetscCall(PetscObjectReference((PetscObject)X_loc));
1003c8564c30SJames Wright   (*ctx)->X_loc = X_loc;
1004c8564c30SJames Wright   if (Y_loc_transpose) PetscCall(PetscObjectReference((PetscObject)Y_loc_transpose));
1005c8564c30SJames Wright   (*ctx)->Y_loc_transpose = Y_loc_transpose;
1006c8564c30SJames Wright 
1007c8564c30SJames Wright   // Memtype
1008c8564c30SJames Wright   {
1009c8564c30SJames Wright     const PetscScalar *x;
1010c8564c30SJames Wright     Vec                X;
1011c8564c30SJames Wright 
1012c8564c30SJames Wright     PetscCall(DMGetLocalVector(dm_x, &X));
1013c8564c30SJames Wright     PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type));
1014c8564c30SJames Wright     PetscCall(VecRestoreArrayReadAndMemType(X, &x));
1015c8564c30SJames Wright     PetscCall(DMRestoreLocalVector(dm_x, &X));
1016c8564c30SJames Wright   }
1017c8564c30SJames Wright 
1018c8564c30SJames Wright   // libCEED objects
1019c8564c30SJames Wright   PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB,
1020c8564c30SJames Wright              "retrieving Ceed context object failed");
102154831c5fSJames Wright   PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed));
102254831c5fSJames Wright   PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len));
102354831c5fSJames Wright   PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult));
102454831c5fSJames Wright   if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose));
102554831c5fSJames Wright   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc));
102654831c5fSJames Wright   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc));
1027c8564c30SJames Wright 
1028c8564c30SJames Wright   // Flop counting
1029c8564c30SJames Wright   {
1030c8564c30SJames Wright     CeedSize ceed_flops_estimate = 0;
1031c8564c30SJames Wright 
103254831c5fSJames Wright     PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate));
1033c8564c30SJames Wright     (*ctx)->flops_mult = ceed_flops_estimate;
1034c8564c30SJames Wright     if (op_mult_transpose) {
103554831c5fSJames Wright       PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate));
1036c8564c30SJames Wright       (*ctx)->flops_mult_transpose = ceed_flops_estimate;
1037c8564c30SJames Wright     }
1038c8564c30SJames Wright   }
1039c8564c30SJames Wright 
1040c8564c30SJames Wright   // Check sizes
1041c8564c30SJames Wright   if (x_loc_len > 0 || y_loc_len > 0) {
1042c8564c30SJames Wright     CeedSize ctx_x_loc_len, ctx_y_loc_len;
1043c8564c30SJames Wright     PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len;
1044c8564c30SJames Wright     Vec      dm_X_loc, dm_Y_loc;
1045c8564c30SJames Wright 
1046c8564c30SJames Wright     // -- Input
1047c8564c30SJames Wright     PetscCall(DMGetLocalVector(dm_x, &dm_X_loc));
1048c8564c30SJames Wright     PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len));
1049c8564c30SJames Wright     PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc));
105054831c5fSJames Wright     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len));
1051c86b9822SJames Wright     if (X_loc) {
1052c86b9822SJames Wright       PetscCall(VecGetLocalSize(X_loc, &X_loc_len));
1053c86b9822SJames Wright       PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1054c86b9822SJames Wright                  "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len);
1055c86b9822SJames Wright     }
1056c86b9822SJames Wright     PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions",
1057c86b9822SJames Wright                x_loc_len, dm_x_loc_len);
1058c86b9822SJames Wright     PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")",
1059c86b9822SJames Wright                x_loc_len, ctx_x_loc_len);
1060c8564c30SJames Wright 
1061c8564c30SJames Wright     // -- Output
1062c8564c30SJames Wright     PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc));
1063c8564c30SJames Wright     PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len));
1064c8564c30SJames Wright     PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc));
106554831c5fSJames Wright     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len));
1066c86b9822SJames Wright     PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions",
1067c86b9822SJames Wright                ctx_y_loc_len, dm_y_loc_len);
1068c8564c30SJames Wright 
1069c8564c30SJames Wright     // -- Transpose
1070c8564c30SJames Wright     if (Y_loc_transpose) {
1071c8564c30SJames Wright       PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len));
1072c86b9822SJames Wright       PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1073c86b9822SJames Wright                  "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len);
1074c8564c30SJames Wright     }
1075c8564c30SJames Wright   }
1076c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1077c8564c30SJames Wright }
1078c8564c30SJames Wright 
1079c8564c30SJames Wright /**
1080c8564c30SJames Wright   @brief Increment reference counter for `MATCEED` context.
1081c8564c30SJames Wright 
1082c8564c30SJames Wright   Not collective across MPI processes.
1083c8564c30SJames Wright 
1084c8564c30SJames Wright   @param[in,out]  ctx  Context data
1085c8564c30SJames Wright 
1086c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
1087c8564c30SJames Wright **/
1088c8564c30SJames Wright PetscErrorCode MatCeedContextReference(MatCeedContext ctx) {
1089c8564c30SJames Wright   PetscFunctionBeginUser;
1090c8564c30SJames Wright   ctx->ref_count++;
1091c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1092c8564c30SJames Wright }
1093c8564c30SJames Wright 
1094c8564c30SJames Wright /**
1095c8564c30SJames Wright   @brief Copy reference for `MATCEED`.
1096c8564c30SJames Wright          Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`.
1097c8564c30SJames Wright 
1098c8564c30SJames Wright   Not collective across MPI processes.
1099c8564c30SJames Wright 
1100c8564c30SJames Wright   @param[in]   ctx       Context data
1101c8564c30SJames Wright   @param[out]  ctx_copy  Copy of pointer to context data
1102c8564c30SJames Wright 
1103c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
1104c8564c30SJames Wright **/
1105c8564c30SJames Wright PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) {
1106c8564c30SJames Wright   PetscFunctionBeginUser;
1107c8564c30SJames Wright   PetscCall(MatCeedContextReference(ctx));
1108c8564c30SJames Wright   PetscCall(MatCeedContextDestroy(*ctx_copy));
1109c8564c30SJames Wright   *ctx_copy = ctx;
1110c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1111c8564c30SJames Wright }
1112c8564c30SJames Wright 
1113c8564c30SJames Wright /**
1114c8564c30SJames Wright   @brief Destroy context data for operator application.
1115c8564c30SJames Wright 
1116c8564c30SJames Wright   Collective across MPI processes.
1117c8564c30SJames Wright 
1118c8564c30SJames Wright   @param[in,out]  ctx  Context data for operator evaluation
1119c8564c30SJames Wright 
1120c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
1121c8564c30SJames Wright **/
1122c8564c30SJames Wright PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) {
1123c8564c30SJames Wright   PetscFunctionBeginUser;
1124c8564c30SJames Wright   if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS);
1125c8564c30SJames Wright 
1126c8564c30SJames Wright   // PETSc objects
1127c8564c30SJames Wright   PetscCall(DMDestroy(&ctx->dm_x));
1128c8564c30SJames Wright   PetscCall(DMDestroy(&ctx->dm_y));
1129c8564c30SJames Wright   PetscCall(VecDestroy(&ctx->X_loc));
1130c8564c30SJames Wright   PetscCall(VecDestroy(&ctx->Y_loc_transpose));
1131c8564c30SJames Wright   PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1132c8564c30SJames Wright   PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1133c8564c30SJames Wright   PetscCall(PetscFree(ctx->internal_mat_type));
1134c8564c30SJames Wright   PetscCall(PetscFree(ctx->mats_assembled_full));
1135c8564c30SJames Wright   PetscCall(PetscFree(ctx->mats_assembled_pbd));
1136c8564c30SJames Wright 
1137c8564c30SJames Wright   // libCEED objects
113854831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc));
113954831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc));
114054831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full));
114154831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd));
114254831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult));
114354831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose));
1144c8564c30SJames Wright   PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed");
1145c8564c30SJames Wright 
1146c8564c30SJames Wright   // Deallocate
1147c8564c30SJames Wright   ctx->is_destroyed = PETSC_TRUE;  // Flag as destroyed in case someone has stale ref
1148c8564c30SJames Wright   PetscCall(PetscFree(ctx));
1149c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1150c8564c30SJames Wright }
1151c8564c30SJames Wright 
1152c8564c30SJames Wright /**
1153c8564c30SJames Wright   @brief Compute the diagonal of an operator via libCEED.
1154c8564c30SJames Wright 
1155c8564c30SJames Wright   Collective across MPI processes.
1156c8564c30SJames Wright 
1157c8564c30SJames Wright   @param[in]   A  `MATCEED`
1158c8564c30SJames Wright   @param[out]  D  Vector holding operator diagonal
1159c8564c30SJames Wright 
1160c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
1161c8564c30SJames Wright **/
1162c8564c30SJames Wright PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) {
1163c8564c30SJames Wright   PetscMemType   mem_type;
1164c8564c30SJames Wright   Vec            D_loc;
1165c8564c30SJames Wright   MatCeedContext ctx;
1166c8564c30SJames Wright 
1167c8564c30SJames Wright   PetscFunctionBeginUser;
1168c8564c30SJames Wright   PetscCall(MatShellGetContext(A, &ctx));
1169c8564c30SJames Wright 
1170c8564c30SJames Wright   // Place PETSc vector in libCEED vector
1171c8564c30SJames Wright   PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc));
1172d0593705SJames Wright   PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc));
1173c8564c30SJames Wright 
1174c8564c30SJames Wright   // Compute Diagonal
117554831c5fSJames Wright   PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1176c8564c30SJames Wright 
1177c8564c30SJames Wright   // Restore PETSc vector
1178d0593705SJames Wright   PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc));
1179c8564c30SJames Wright 
1180c8564c30SJames Wright   // Local-to-Global
1181c8564c30SJames Wright   PetscCall(VecZeroEntries(D));
1182c8564c30SJames Wright   PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D));
1183c8564c30SJames Wright   PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc));
1184c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1185c8564c30SJames Wright }
1186c8564c30SJames Wright 
1187c8564c30SJames Wright /**
1188c8564c30SJames Wright   @brief Compute `A X = Y` for a `MATCEED`.
1189c8564c30SJames Wright 
1190c8564c30SJames Wright   Collective across MPI processes.
1191c8564c30SJames Wright 
1192c8564c30SJames Wright   @param[in]   A  `MATCEED`
1193c8564c30SJames Wright   @param[in]   X  Input PETSc vector
1194c8564c30SJames Wright   @param[out]  Y  Output PETSc vector
1195c8564c30SJames Wright 
1196c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
1197c8564c30SJames Wright **/
1198c8564c30SJames Wright PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) {
1199c8564c30SJames Wright   MatCeedContext ctx;
1200c8564c30SJames Wright 
1201c8564c30SJames Wright   PetscFunctionBeginUser;
1202c8564c30SJames Wright   PetscCall(MatShellGetContext(A, &ctx));
1203c8564c30SJames Wright   PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, 0));
1204c8564c30SJames Wright 
1205c8564c30SJames Wright   {
1206c8564c30SJames Wright     PetscMemType x_mem_type, y_mem_type;
1207c8564c30SJames Wright     Vec          X_loc = ctx->X_loc, Y_loc;
1208c8564c30SJames Wright 
1209c8564c30SJames Wright     // Get local vectors
1210c8564c30SJames Wright     if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1211c8564c30SJames Wright     PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1212c8564c30SJames Wright 
1213c8564c30SJames Wright     // Global-to-local
1214c8564c30SJames Wright     PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc));
1215c8564c30SJames Wright 
1216c8564c30SJames Wright     // Setup libCEED vectors
1217d0593705SJames Wright     PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1218c8564c30SJames Wright     PetscCall(VecZeroEntries(Y_loc));
1219d0593705SJames Wright     PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1220c8564c30SJames Wright 
1221c8564c30SJames Wright     // Apply libCEED operator
1222c8564c30SJames Wright     PetscCall(PetscLogGpuTimeBegin());
122354831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE));
1224c8564c30SJames Wright     PetscCall(PetscLogGpuTimeEnd());
1225c8564c30SJames Wright 
1226c8564c30SJames Wright     // Restore PETSc vectors
1227d0593705SJames Wright     PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1228d0593705SJames Wright     PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1229c8564c30SJames Wright 
1230c8564c30SJames Wright     // Local-to-global
1231c8564c30SJames Wright     PetscCall(VecZeroEntries(Y));
1232c8564c30SJames Wright     PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y));
1233c8564c30SJames Wright 
1234c8564c30SJames Wright     // Restore local vectors, as needed
1235c8564c30SJames Wright     if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1236c8564c30SJames Wright     PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1237c8564c30SJames Wright   }
1238c8564c30SJames Wright 
1239c8564c30SJames Wright   // Log flops
1240c8564c30SJames Wright   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult));
1241c8564c30SJames Wright   else PetscCall(PetscLogFlops(ctx->flops_mult));
1242c8564c30SJames Wright 
1243c8564c30SJames Wright   PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, 0));
1244c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1245c8564c30SJames Wright }
1246c8564c30SJames Wright 
1247c8564c30SJames Wright /**
1248c8564c30SJames Wright   @brief Compute `A^T Y = X` for a `MATCEED`.
1249c8564c30SJames Wright 
1250c8564c30SJames Wright   Collective across MPI processes.
1251c8564c30SJames Wright 
1252c8564c30SJames Wright   @param[in]   A  `MATCEED`
1253c8564c30SJames Wright   @param[in]   Y  Input PETSc vector
1254c8564c30SJames Wright   @param[out]  X  Output PETSc vector
1255c8564c30SJames Wright 
1256c8564c30SJames Wright   @return An error code: 0 - success, otherwise - failure
1257c8564c30SJames Wright **/
1258c8564c30SJames Wright PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) {
1259c8564c30SJames Wright   MatCeedContext ctx;
1260c8564c30SJames Wright 
1261c8564c30SJames Wright   PetscFunctionBeginUser;
1262c8564c30SJames Wright   PetscCall(MatShellGetContext(A, &ctx));
1263c8564c30SJames Wright   PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, 0));
1264c8564c30SJames Wright 
1265c8564c30SJames Wright   {
1266c8564c30SJames Wright     PetscMemType x_mem_type, y_mem_type;
1267c8564c30SJames Wright     Vec          X_loc, Y_loc = ctx->Y_loc_transpose;
1268c8564c30SJames Wright 
1269c8564c30SJames Wright     // Get local vectors
1270c8564c30SJames Wright     if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1271c8564c30SJames Wright     PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1272c8564c30SJames Wright 
1273c8564c30SJames Wright     // Global-to-local
1274c8564c30SJames Wright     PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc));
1275c8564c30SJames Wright 
1276c8564c30SJames Wright     // Setup libCEED vectors
1277d0593705SJames Wright     PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1278c8564c30SJames Wright     PetscCall(VecZeroEntries(X_loc));
1279d0593705SJames Wright     PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1280c8564c30SJames Wright 
1281c8564c30SJames Wright     // Apply libCEED operator
1282c8564c30SJames Wright     PetscCall(PetscLogGpuTimeBegin());
128354831c5fSJames Wright     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1284c8564c30SJames Wright     PetscCall(PetscLogGpuTimeEnd());
1285c8564c30SJames Wright 
1286c8564c30SJames Wright     // Restore PETSc vectors
1287d0593705SJames Wright     PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1288d0593705SJames Wright     PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1289c8564c30SJames Wright 
1290c8564c30SJames Wright     // Local-to-global
1291c8564c30SJames Wright     PetscCall(VecZeroEntries(X));
1292c8564c30SJames Wright     PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X));
1293c8564c30SJames Wright 
1294c8564c30SJames Wright     // Restore local vectors, as needed
1295c8564c30SJames Wright     if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1296c8564c30SJames Wright     PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1297c8564c30SJames Wright   }
1298c8564c30SJames Wright 
1299c8564c30SJames Wright   // Log flops
1300c8564c30SJames Wright   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose));
1301c8564c30SJames Wright   else PetscCall(PetscLogFlops(ctx->flops_mult_transpose));
1302c8564c30SJames Wright 
1303c8564c30SJames Wright   PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, 0));
1304c8564c30SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
1305c8564c30SJames Wright }
1306