1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED: http://github.com/ceed
7
8 // libCEED + PETSc Example: CEED BPs
9 //
10 // This example demonstrates a simple usage of libCEED with PETSc to solve the CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps.
11 //
12 // The code is intentionally "raw", using only low-level communication primitives.
13 //
14 // Build with:
15 //
16 // make bpsraw [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>]
17 //
18 // Sample runs:
19 //
20 // ./bpsraw -problem bp1
21 // ./bpsraw -problem bp2
22 // ./bpsraw -problem bp3
23 // ./bpsraw -problem bp4
24 // ./bpsraw -problem bp5 -ceed /cpu/self
25 // ./bpsraw -problem bp6 -ceed /gpu/cuda
26 //
27 //TESTARGS -ceed {ceed_resource} -test -problem bp2 -degree 5 -q_extra 1 -ksp_max_it_clip 15,15
28
29 /// @file
30 /// CEED BPs example using PETSc
31 /// See bps.c for an implementation using DMPlex unstructured grids.
32 const char help[] = "Solve CEED BPs using PETSc\n";
33
34 #include <ceed.h>
35 #include <petscdm.h>
36 #include <petscksp.h>
37 #include <stdbool.h>
38 #include <string.h>
39
40 #include "qfunctions/bps/bp1.h"
41 #include "qfunctions/bps/bp2.h"
42 #include "qfunctions/bps/bp3.h"
43 #include "qfunctions/bps/bp4.h"
44 #include "qfunctions/bps/common.h"
45
MemTypeP2C(PetscMemType mem_type)46 static CeedMemType MemTypeP2C(PetscMemType mem_type) { return PetscMemTypeDevice(mem_type) ? CEED_MEM_DEVICE : CEED_MEM_HOST; }
47
Split3(PetscInt size,PetscInt m[3],bool reverse)48 static void Split3(PetscInt size, PetscInt m[3], bool reverse) {
49 for (PetscInt d = 0, size_left = size; d < 3; d++) {
50 PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(size_left, 1. / (3 - d)));
51 while (try * (size_left / try) != size_left) try++;
52 m[reverse ? 2 - d : d] = try;
53 size_left /= try;
54 }
55 }
56
Max3(const PetscInt a[3])57 static PetscInt Max3(const PetscInt a[3]) { return PetscMax(a[0], PetscMax(a[1], a[2])); }
Min3(const PetscInt a[3])58 static PetscInt Min3(const PetscInt a[3]) { return PetscMin(a[0], PetscMin(a[1], a[2])); }
GlobalNodes(const PetscInt p[3],const PetscInt i_rank[3],PetscInt degree,const PetscInt mesh_elem[3],PetscInt m_nodes[3])59 static void GlobalNodes(const PetscInt p[3], const PetscInt i_rank[3], PetscInt degree, const PetscInt mesh_elem[3], PetscInt m_nodes[3]) {
60 for (int d = 0; d < 3; d++) m_nodes[d] = degree * mesh_elem[d] + (i_rank[d] == p[d] - 1);
61 }
GlobalStart(const PetscInt p[3],const PetscInt i_rank[3],PetscInt degree,const PetscInt mesh_elem[3])62 static PetscInt GlobalStart(const PetscInt p[3], const PetscInt i_rank[3], PetscInt degree, const PetscInt mesh_elem[3]) {
63 PetscInt start = 0;
64 // Dumb brute-force is easier to read
65 for (PetscInt i = 0; i < p[0]; i++) {
66 for (PetscInt j = 0; j < p[1]; j++) {
67 for (PetscInt k = 0; k < p[2]; k++) {
68 PetscInt m_nodes[3], ijk_rank[] = {i, j, k};
69 if (i == i_rank[0] && j == i_rank[1] && k == i_rank[2]) return start;
70 GlobalNodes(p, ijk_rank, degree, mesh_elem, m_nodes);
71 start += m_nodes[0] * m_nodes[1] * m_nodes[2];
72 }
73 }
74 }
75 return -1;
76 }
CreateRestriction(Ceed ceed,const PetscInt mesh_elem[3],CeedInt P,CeedInt num_comp,CeedElemRestriction * elem_restr)77 static PetscErrorCode CreateRestriction(Ceed ceed, const PetscInt mesh_elem[3], CeedInt P, CeedInt num_comp, CeedElemRestriction *elem_restr) {
78 const PetscInt num_elem = mesh_elem[0] * mesh_elem[1] * mesh_elem[2];
79 CeedInt m_nodes[3], *idx, *idx_p;
80
81 PetscFunctionBeginUser;
82 // Get indicies
83 for (int d = 0; d < 3; d++) m_nodes[d] = mesh_elem[d] * (P - 1) + 1;
84 idx_p = idx = malloc(num_elem * P * P * P * sizeof idx[0]);
85 for (CeedInt i = 0; i < mesh_elem[0]; i++) {
86 for (CeedInt j = 0; j < mesh_elem[1]; j++) {
87 for (CeedInt k = 0; k < mesh_elem[2]; k++, idx_p += P * P * P) {
88 for (CeedInt ii = 0; ii < P; ii++) {
89 for (CeedInt jj = 0; jj < P; jj++) {
90 for (CeedInt kk = 0; kk < P; kk++) {
91 if (0) { // This is the C-style (i,j,k) ordering that I prefer
92 idx_p[(ii * P + jj) * P + kk] = num_comp * (((i * (P - 1) + ii) * m_nodes[1] + (j * (P - 1) + jj)) * m_nodes[2] + (k * (P - 1) + kk));
93 } else { // (k,j,i) ordering for consistency with MFEM example
94 idx_p[ii + P * (jj + P * kk)] = num_comp * (((i * (P - 1) + ii) * m_nodes[1] + (j * (P - 1) + jj)) * m_nodes[2] + (k * (P - 1) + kk));
95 }
96 }
97 }
98 }
99 }
100 }
101 }
102
103 // Setup CEED restriction
104 CeedElemRestrictionCreate(ceed, num_elem, P * P * P, num_comp, 1, m_nodes[0] * m_nodes[1] * m_nodes[2] * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
105 idx, elem_restr);
106 PetscFunctionReturn(PETSC_SUCCESS);
107 }
108
109 // Data for PETSc
110 typedef struct OperatorApplyContext_ *OperatorApplyContext;
111 struct OperatorApplyContext_ {
112 MPI_Comm comm;
113 VecScatter l_to_g; // Scatter for all entries
114 VecScatter l_to_g_0; // Skip Dirichlet values
115 VecScatter g_to_g_D; // global-to-global; only Dirichlet values
116 Vec X_loc, Y_loc;
117 CeedVector x_ceed, y_ceed;
118 CeedOperator op;
119 CeedVector q_data;
120 Ceed ceed;
121 };
122
123 // BP Options
124 typedef enum { CEED_BP1 = 0, CEED_BP2 = 1, CEED_BP3 = 2, CEED_BP4 = 3, CEED_BP5 = 4, CEED_BP6 = 5 } BPType;
125 static const char *const bp_types[] = {"bp1", "bp2", "bp3", "bp4", "bp5", "bp6", "BPType", "CEED_BP", 0};
126
127 // BP specific data
128 typedef struct {
129 CeedInt num_comp_u, q_data_size, q_extra;
130 CeedQFunctionUser setup_geo, setup_rhs, apply, error;
131 const char *setup_geo_loc, *setup_rhs_loc, *apply_loc, *error_loc;
132 CeedEvalMode in_mode, out_mode;
133 CeedQuadMode q_mode;
134 } BPData;
135
136 BPData bp_options[6] = {
137 [CEED_BP1] = {.num_comp_u = 1,
138 .q_data_size = 1,
139 .q_extra = 1,
140 .setup_geo = SetupMassGeo,
141 .setup_rhs = SetupMassRhs,
142 .apply = Mass,
143 .error = Error,
144 .setup_geo_loc = SetupMassGeo_loc,
145 .setup_rhs_loc = SetupMassRhs_loc,
146 .apply_loc = Mass_loc,
147 .error_loc = Error_loc,
148 .in_mode = CEED_EVAL_INTERP,
149 .out_mode = CEED_EVAL_INTERP,
150 .q_mode = CEED_GAUSS },
151 [CEED_BP2] = {.num_comp_u = 3,
152 .q_data_size = 1,
153 .q_extra = 1,
154 .setup_geo = SetupMassGeo,
155 .setup_rhs = SetupMassRhs3,
156 .apply = Mass3,
157 .error = Error3,
158 .setup_geo_loc = SetupMassGeo_loc,
159 .setup_rhs_loc = SetupMassRhs3_loc,
160 .apply_loc = Mass3_loc,
161 .error_loc = Error3_loc,
162 .in_mode = CEED_EVAL_INTERP,
163 .out_mode = CEED_EVAL_INTERP,
164 .q_mode = CEED_GAUSS },
165 [CEED_BP3] = {.num_comp_u = 1,
166 .q_data_size = 7,
167 .q_extra = 1,
168 .setup_geo = SetupDiffGeo,
169 .setup_rhs = SetupDiffRhs,
170 .apply = Diff,
171 .error = Error,
172 .setup_geo_loc = SetupDiffGeo_loc,
173 .setup_rhs_loc = SetupDiffRhs_loc,
174 .apply_loc = Diff_loc,
175 .error_loc = Error_loc,
176 .in_mode = CEED_EVAL_GRAD,
177 .out_mode = CEED_EVAL_GRAD,
178 .q_mode = CEED_GAUSS },
179 [CEED_BP4] = {.num_comp_u = 3,
180 .q_data_size = 7,
181 .q_extra = 1,
182 .setup_geo = SetupDiffGeo,
183 .setup_rhs = SetupDiffRhs3,
184 .apply = Diff3,
185 .error = Error3,
186 .setup_geo_loc = SetupDiffGeo_loc,
187 .setup_rhs_loc = SetupDiffRhs3_loc,
188 .apply_loc = Diff3_loc,
189 .error_loc = Error3_loc,
190 .in_mode = CEED_EVAL_GRAD,
191 .out_mode = CEED_EVAL_GRAD,
192 .q_mode = CEED_GAUSS },
193 [CEED_BP5] = {.num_comp_u = 1,
194 .q_data_size = 7,
195 .q_extra = 0,
196 .setup_geo = SetupDiffGeo,
197 .setup_rhs = SetupDiffRhs,
198 .apply = Diff,
199 .error = Error,
200 .setup_geo_loc = SetupDiffGeo_loc,
201 .setup_rhs_loc = SetupDiffRhs_loc,
202 .apply_loc = Diff_loc,
203 .error_loc = Error_loc,
204 .in_mode = CEED_EVAL_GRAD,
205 .out_mode = CEED_EVAL_GRAD,
206 .q_mode = CEED_GAUSS_LOBATTO},
207 [CEED_BP6] = {.num_comp_u = 3,
208 .q_data_size = 7,
209 .q_extra = 0,
210 .setup_geo = SetupDiffGeo,
211 .setup_rhs = SetupDiffRhs3,
212 .apply = Diff3,
213 .error = Error3,
214 .setup_geo_loc = SetupDiffGeo_loc,
215 .setup_rhs_loc = SetupDiffRhs3_loc,
216 .apply_loc = Diff3_loc,
217 .error_loc = Error3_loc,
218 .in_mode = CEED_EVAL_GRAD,
219 .out_mode = CEED_EVAL_GRAD,
220 .q_mode = CEED_GAUSS_LOBATTO}
221 };
222
223 // This function uses libCEED to compute the action of the mass matrix
MatMult_Mass(Mat A,Vec X,Vec Y)224 static PetscErrorCode MatMult_Mass(Mat A, Vec X, Vec Y) {
225 OperatorApplyContext op_apply_ctx;
226 PetscScalar *x, *y;
227 PetscMemType x_mem_type, y_mem_type;
228
229 PetscFunctionBeginUser;
230 PetscCall(MatShellGetContext(A, &op_apply_ctx));
231
232 // Global-to-local
233 PetscCall(VecScatterBegin(op_apply_ctx->l_to_g, X, op_apply_ctx->X_loc, INSERT_VALUES, SCATTER_REVERSE));
234 PetscCall(VecScatterEnd(op_apply_ctx->l_to_g, X, op_apply_ctx->X_loc, INSERT_VALUES, SCATTER_REVERSE));
235
236 // Setup libCEED vectors
237 PetscCall(VecGetArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x, &x_mem_type));
238 PetscCall(VecGetArrayAndMemType(op_apply_ctx->Y_loc, &y, &y_mem_type));
239 CeedVectorSetArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, x);
240 CeedVectorSetArray(op_apply_ctx->y_ceed, MemTypeP2C(y_mem_type), CEED_USE_POINTER, y);
241
242 // Apply libCEED operator
243 CeedOperatorApply(op_apply_ctx->op, op_apply_ctx->x_ceed, op_apply_ctx->y_ceed, CEED_REQUEST_IMMEDIATE);
244
245 // Restore PETSc vectors
246 CeedVectorTakeArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), NULL);
247 CeedVectorTakeArray(op_apply_ctx->y_ceed, MemTypeP2C(y_mem_type), NULL);
248 PetscCall(VecRestoreArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x));
249 PetscCall(VecRestoreArrayAndMemType(op_apply_ctx->Y_loc, &y));
250
251 // Local-to-global
252 if (Y) {
253 PetscCall(VecZeroEntries(Y));
254 PetscCall(VecScatterBegin(op_apply_ctx->l_to_g, op_apply_ctx->Y_loc, Y, ADD_VALUES, SCATTER_FORWARD));
255 PetscCall(VecScatterEnd(op_apply_ctx->l_to_g, op_apply_ctx->Y_loc, Y, ADD_VALUES, SCATTER_FORWARD));
256 }
257 PetscFunctionReturn(PETSC_SUCCESS);
258 }
259
260 // This function uses libCEED to compute the action of the Laplacian with Dirichlet boundary conditions
MatMult_Diff(Mat A,Vec X,Vec Y)261 static PetscErrorCode MatMult_Diff(Mat A, Vec X, Vec Y) {
262 OperatorApplyContext op_apply_ctx;
263 PetscScalar *x, *y;
264 PetscMemType x_mem_type, y_mem_type;
265
266 PetscFunctionBeginUser;
267 PetscCall(MatShellGetContext(A, &op_apply_ctx));
268
269 // Global-to-local
270 PetscCall(VecScatterBegin(op_apply_ctx->l_to_g_0, X, op_apply_ctx->X_loc, INSERT_VALUES, SCATTER_REVERSE));
271 PetscCall(VecScatterEnd(op_apply_ctx->l_to_g_0, X, op_apply_ctx->X_loc, INSERT_VALUES, SCATTER_REVERSE));
272
273 // Setup libCEED vectors
274 PetscCall(VecGetArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x, &x_mem_type));
275 PetscCall(VecGetArrayAndMemType(op_apply_ctx->Y_loc, &y, &y_mem_type));
276 CeedVectorSetArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, x);
277 CeedVectorSetArray(op_apply_ctx->y_ceed, MemTypeP2C(y_mem_type), CEED_USE_POINTER, y);
278
279 // Apply libCEED operator
280 CeedOperatorApply(op_apply_ctx->op, op_apply_ctx->x_ceed, op_apply_ctx->y_ceed, CEED_REQUEST_IMMEDIATE);
281
282 // Restore PETSc vectors
283 CeedVectorTakeArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), NULL);
284 CeedVectorTakeArray(op_apply_ctx->y_ceed, MemTypeP2C(y_mem_type), NULL);
285 PetscCall(VecRestoreArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x));
286 PetscCall(VecRestoreArrayAndMemType(op_apply_ctx->Y_loc, &y));
287
288 // Local-to-global
289 PetscCall(VecZeroEntries(Y));
290 PetscCall(VecScatterBegin(op_apply_ctx->g_to_g_D, X, Y, INSERT_VALUES, SCATTER_FORWARD));
291 PetscCall(VecScatterEnd(op_apply_ctx->g_to_g_D, X, Y, INSERT_VALUES, SCATTER_FORWARD));
292 PetscCall(VecScatterBegin(op_apply_ctx->l_to_g_0, op_apply_ctx->Y_loc, Y, ADD_VALUES, SCATTER_FORWARD));
293 PetscCall(VecScatterEnd(op_apply_ctx->l_to_g_0, op_apply_ctx->Y_loc, Y, ADD_VALUES, SCATTER_FORWARD));
294 PetscFunctionReturn(PETSC_SUCCESS);
295 }
296
297 // This function calculates the error in the final solution
ComputeErrorMax(OperatorApplyContext op_apply_ctx,CeedOperator op_error,Vec X,CeedVector target,PetscReal * max_error)298 static PetscErrorCode ComputeErrorMax(OperatorApplyContext op_apply_ctx, CeedOperator op_error, Vec X, CeedVector target, PetscReal *max_error) {
299 PetscScalar *x;
300 PetscMemType mem_type;
301 CeedVector collocated_error;
302 CeedSize length;
303
304 PetscFunctionBeginUser;
305 CeedVectorGetLength(target, &length);
306 CeedVectorCreate(op_apply_ctx->ceed, length, &collocated_error);
307
308 // Global-to-local
309 PetscCall(VecScatterBegin(op_apply_ctx->l_to_g, X, op_apply_ctx->X_loc, INSERT_VALUES, SCATTER_REVERSE));
310 PetscCall(VecScatterEnd(op_apply_ctx->l_to_g, X, op_apply_ctx->X_loc, INSERT_VALUES, SCATTER_REVERSE));
311
312 // Setup libCEED vector
313 PetscCall(VecGetArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x, &mem_type));
314 CeedVectorSetArray(op_apply_ctx->x_ceed, MemTypeP2C(mem_type), CEED_USE_POINTER, x);
315
316 // Apply libCEED operator
317 CeedOperatorApply(op_error, op_apply_ctx->x_ceed, collocated_error, CEED_REQUEST_IMMEDIATE);
318
319 // Restore PETSc vector
320 CeedVectorTakeArray(op_apply_ctx->x_ceed, MemTypeP2C(mem_type), NULL);
321 PetscCall(VecRestoreArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x));
322
323 // Reduce max error
324 *max_error = 0;
325 const CeedScalar *e;
326 CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &e);
327 for (CeedInt i = 0; i < length; i++) {
328 *max_error = PetscMax(*max_error, PetscAbsScalar(e[i]));
329 }
330 CeedVectorRestoreArrayRead(collocated_error, &e);
331 PetscCall(MPI_Allreduce(MPI_IN_PLACE, max_error, 1, MPIU_REAL, MPIU_MAX, op_apply_ctx->comm));
332
333 // Cleanup
334 CeedVectorDestroy(&collocated_error);
335 PetscFunctionReturn(PETSC_SUCCESS);
336 }
337
main(int argc,char ** argv)338 int main(int argc, char **argv) {
339 MPI_Comm comm;
340 char ceed_resource[PETSC_MAX_PATH_LEN] = "/cpu/self";
341 double my_rt_start, my_rt, rt_min, rt_max;
342 PetscInt degree, q_extra, local_nodes, local_elem, mesh_elem[3], m_nodes[3], p[3], i_rank[3], l_nodes[3], l_size, num_comp_u = 1,
343 ksp_max_it_clip[2];
344 PetscScalar *r;
345 PetscBool test_mode, benchmark_mode, write_solution;
346 PetscMPIInt size, rank;
347 PetscLogStage solve_stage;
348 Vec X, X_loc, rhs, rhs_loc;
349 Mat mat;
350 KSP ksp;
351 VecScatter l_to_g, l_to_g_0, g_to_g_D;
352 PetscMemType mem_type;
353 OperatorApplyContext op_apply_ctx;
354 Ceed ceed;
355 CeedBasis basis_x, basis_u;
356 CeedElemRestriction elem_restr_x, elem_restr_u, elem_restr_u_i, elem_restr_qd_i;
357 CeedQFunction qf_setup_geo, qf_setup_rhs, qf_apply, qf_error;
358 CeedOperator op_setup_geo, op_setup_rhs, op_apply, op_error;
359 CeedVector x_coord, q_data, rhs_ceed, target;
360 CeedInt P, Q;
361 const CeedInt dim = 3, num_comp_x = 3;
362 BPType bp_choice;
363
364 PetscCall(PetscInitialize(&argc, &argv, NULL, help));
365 comm = PETSC_COMM_WORLD;
366
367 // Read command line options
368 PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
369 bp_choice = CEED_BP1;
370 PetscCall(PetscOptionsEnum("-problem", "CEED benchmark problem to solve", NULL, bp_types, (PetscEnum)bp_choice, (PetscEnum *)&bp_choice, NULL));
371 num_comp_u = bp_options[bp_choice].num_comp_u;
372 test_mode = PETSC_FALSE;
373 PetscCall(PetscOptionsBool("-test", "Testing mode (do not print unless error is large)", NULL, test_mode, &test_mode, NULL));
374 benchmark_mode = PETSC_FALSE;
375 PetscCall(PetscOptionsBool("-benchmark", "Benchmarking mode (prints benchmark statistics)", NULL, benchmark_mode, &benchmark_mode, NULL));
376 write_solution = PETSC_FALSE;
377 PetscCall(PetscOptionsBool("-write_solution", "Write solution for visualization", NULL, write_solution, &write_solution, NULL));
378 degree = test_mode ? 3 : 1;
379 PetscCall(PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", NULL, degree, °ree, NULL));
380 q_extra = bp_options[bp_choice].q_extra;
381 PetscCall(PetscOptionsInt("-q_extra", "Number of extra quadrature points", NULL, q_extra, &q_extra, NULL));
382 PetscCall(PetscOptionsString("-ceed", "CEED resource specifier", NULL, ceed_resource, ceed_resource, sizeof(ceed_resource), NULL));
383 local_nodes = 1000;
384 PetscCall(PetscOptionsInt("-local", "Target number of locally owned nodes per process", NULL, local_nodes, &local_nodes, NULL));
385 PetscInt two = 2;
386 ksp_max_it_clip[0] = 5;
387 ksp_max_it_clip[1] = 20;
388 PetscCall(PetscOptionsIntArray("-ksp_max_it_clip", "Min and max number of iterations to use during benchmarking", NULL, ksp_max_it_clip, &two,
389 NULL));
390 PetscOptionsEnd();
391 P = degree + 1;
392 Q = P + q_extra;
393
394 // Set up libCEED
395 CeedInit(ceed_resource, &ceed);
396 CeedMemType mem_type_backend;
397 CeedGetPreferredMemType(ceed, &mem_type_backend);
398
399 VecType default_vec_type = NULL, vec_type;
400 switch (mem_type_backend) {
401 case CEED_MEM_HOST:
402 default_vec_type = VECSTANDARD;
403 break;
404 case CEED_MEM_DEVICE: {
405 const char *resolved;
406
407 CeedGetResource(ceed, &resolved);
408 if (strstr(resolved, "/gpu/cuda")) default_vec_type = VECCUDA;
409 else if (strstr(resolved, "/gpu/hip")) default_vec_type = VECHIP;
410 else default_vec_type = VECSTANDARD;
411 }
412 }
413
414 // Determine size of process grid
415 PetscCall(MPI_Comm_size(comm, &size));
416 Split3(size, p, false);
417
418 // Find a nicely composite number of elements no less than local_nodes
419 for (local_elem = PetscMax(1, local_nodes / (degree * degree * degree));; local_elem++) {
420 Split3(local_elem, mesh_elem, true);
421 if (Max3(mesh_elem) / Min3(mesh_elem) <= 2) break;
422 }
423
424 // Find my location in the process grid
425 PetscCall(MPI_Comm_rank(comm, &rank));
426 for (int d = 0, rank_left = rank; d < dim; d++) {
427 const int pstride[3] = {p[1] * p[2], p[2], 1};
428 i_rank[d] = rank_left / pstride[d];
429 rank_left -= i_rank[d] * pstride[d];
430 }
431
432 GlobalNodes(p, i_rank, degree, mesh_elem, m_nodes);
433
434 // Setup global vector
435 PetscCall(VecCreate(comm, &X));
436 PetscCall(VecSetType(X, default_vec_type));
437 PetscCall(VecSetSizes(X, m_nodes[0] * m_nodes[1] * m_nodes[2] * num_comp_u, PETSC_DECIDE));
438 PetscCall(VecSetFromOptions(X));
439 PetscCall(VecSetUp(X));
440
441 // Print summary
442 PetscInt gsize;
443
444 PetscCall(VecGetSize(X, &gsize));
445 if (!test_mode) {
446 const char *used_resource;
447 CeedGetResource(ceed, &used_resource);
448
449 PetscCall(VecGetType(X, &vec_type));
450
451 PetscCall(PetscPrintf(comm,
452 "\n-- CEED Benchmark Problem %" CeedInt_FMT " -- libCEED + PETSc --\n"
453 " PETSc:\n"
454 " PETSc Vec Type : %s\n"
455 " libCEED:\n"
456 " libCEED Backend : %s\n"
457 " libCEED Backend MemType : %s\n"
458 " Mesh:\n"
459 " Solution Order (P) : %" CeedInt_FMT "\n"
460 " Quadrature Order (Q) : %" CeedInt_FMT "\n"
461 " Global nodes : %" PetscInt_FMT "\n"
462 " Process Decomposition : %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n"
463 " Local Elements : %" PetscInt_FMT " = %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n"
464 " Owned nodes : %" PetscInt_FMT " = %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n"
465 " DoF per node : %" PetscInt_FMT "\n",
466 bp_choice + 1, vec_type, used_resource, CeedMemTypes[mem_type_backend], P, Q, gsize / num_comp_u, p[0], p[1], p[2],
467 local_elem, mesh_elem[0], mesh_elem[1], mesh_elem[2], m_nodes[0] * m_nodes[1] * m_nodes[2], m_nodes[0], m_nodes[1],
468 m_nodes[2], num_comp_u));
469 }
470
471 {
472 l_size = 1;
473 for (int d = 0; d < dim; d++) {
474 l_nodes[d] = mesh_elem[d] * degree + 1;
475 l_size *= l_nodes[d];
476 }
477 PetscCall(VecCreate(PETSC_COMM_SELF, &X_loc));
478 PetscCall(VecSetType(X_loc, default_vec_type));
479 PetscCall(VecSetSizes(X_loc, l_size * num_comp_u, PETSC_DECIDE));
480 PetscCall(VecSetFromOptions(X_loc));
481 PetscCall(VecSetUp(X_loc));
482
483 // Create local-to-global scatter
484 PetscInt *l_to_g_ind, *l_to_g_ind_0, *loc_ind, l_0_count;
485 IS l_to_g_is, l_to_g_is_0, loc_is;
486 PetscInt g_start[2][2][2], g_m_nodes[2][2][2][dim];
487
488 for (int i = 0; i < 2; i++) {
489 for (int j = 0; j < 2; j++) {
490 for (int k = 0; k < 2; k++) {
491 PetscInt ijk_rank[3] = {i_rank[0] + i, i_rank[1] + j, i_rank[2] + k};
492 g_start[i][j][k] = GlobalStart(p, ijk_rank, degree, mesh_elem);
493 GlobalNodes(p, ijk_rank, degree, mesh_elem, g_m_nodes[i][j][k]);
494 }
495 }
496 }
497
498 PetscCall(PetscMalloc1(l_size, &l_to_g_ind));
499 PetscCall(PetscMalloc1(l_size, &l_to_g_ind_0));
500 PetscCall(PetscMalloc1(l_size, &loc_ind));
501 l_0_count = 0;
502 for (PetscInt i = 0, ir, ii; ir = i >= m_nodes[0], ii = i - ir * m_nodes[0], i < l_nodes[0]; i++) {
503 for (PetscInt j = 0, jr, jj; jr = j >= m_nodes[1], jj = j - jr * m_nodes[1], j < l_nodes[1]; j++) {
504 for (PetscInt k = 0, kr, kk; kr = k >= m_nodes[2], kk = k - kr * m_nodes[2], k < l_nodes[2]; k++) {
505 PetscInt here = (i * l_nodes[1] + j) * l_nodes[2] + k;
506 l_to_g_ind[here] = g_start[ir][jr][kr] + (ii * g_m_nodes[ir][jr][kr][1] + jj) * g_m_nodes[ir][jr][kr][2] + kk;
507 if ((i_rank[0] == 0 && i == 0) || (i_rank[1] == 0 && j == 0) || (i_rank[2] == 0 && k == 0) ||
508 (i_rank[0] + 1 == p[0] && i + 1 == l_nodes[0]) || (i_rank[1] + 1 == p[1] && j + 1 == l_nodes[1]) ||
509 (i_rank[2] + 1 == p[2] && k + 1 == l_nodes[2])) {
510 continue;
511 }
512 l_to_g_ind_0[l_0_count] = l_to_g_ind[here];
513 loc_ind[l_0_count++] = here;
514 }
515 }
516 }
517 PetscCall(ISCreateBlock(comm, num_comp_u, l_size, l_to_g_ind, PETSC_OWN_POINTER, &l_to_g_is));
518 PetscCall(VecScatterCreate(X_loc, NULL, X, l_to_g_is, &l_to_g));
519 PetscCall(ISCreateBlock(comm, num_comp_u, l_0_count, l_to_g_ind_0, PETSC_OWN_POINTER, &l_to_g_is_0));
520 PetscCall(ISCreateBlock(comm, num_comp_u, l_0_count, loc_ind, PETSC_OWN_POINTER, &loc_is));
521 PetscCall(VecScatterCreate(X_loc, loc_is, X, l_to_g_is_0, &l_to_g_0));
522 {
523 // Create global-to-global scatter for Dirichlet values (everything not in l_to_g_is_0, which is the range of l_to_g_0)
524 PetscInt x_start, x_end, *ind_D, count_D = 0;
525 IS is_D;
526 const PetscScalar *x;
527 PetscCall(VecZeroEntries(X_loc));
528 PetscCall(VecSet(X, 1.0));
529 PetscCall(VecScatterBegin(l_to_g_0, X_loc, X, INSERT_VALUES, SCATTER_FORWARD));
530 PetscCall(VecScatterEnd(l_to_g_0, X_loc, X, INSERT_VALUES, SCATTER_FORWARD));
531 PetscCall(VecGetOwnershipRange(X, &x_start, &x_end));
532 PetscCall(PetscMalloc1(x_end - x_start, &ind_D));
533 PetscCall(VecGetArrayRead(X, &x));
534 for (PetscInt i = 0; i < x_end - x_start; i++) {
535 if (x[i] == 1.) ind_D[count_D++] = x_start + i;
536 }
537 PetscCall(VecRestoreArrayRead(X, &x));
538 PetscCall(ISCreateGeneral(comm, count_D, ind_D, PETSC_COPY_VALUES, &is_D));
539 PetscCall(PetscFree(ind_D));
540 PetscCall(VecScatterCreate(X, is_D, X, is_D, &g_to_g_D));
541 PetscCall(ISDestroy(&is_D));
542 }
543 PetscCall(ISDestroy(&l_to_g_is));
544 PetscCall(ISDestroy(&l_to_g_is_0));
545 PetscCall(ISDestroy(&loc_is));
546 }
547
548 // CEED bases
549 CeedBasisCreateTensorH1Lagrange(ceed, dim, num_comp_u, P, Q, bp_options[bp_choice].q_mode, &basis_u);
550 CeedBasisCreateTensorH1Lagrange(ceed, dim, num_comp_x, 2, Q, bp_options[bp_choice].q_mode, &basis_x);
551
552 // CEED restrictions
553 PetscCall(CreateRestriction(ceed, mesh_elem, P, num_comp_u, &elem_restr_u));
554 PetscCall(CreateRestriction(ceed, mesh_elem, 2, dim, &elem_restr_x));
555 CeedInt num_elem = mesh_elem[0] * mesh_elem[1] * mesh_elem[2];
556
557 CeedElemRestrictionCreateStrided(ceed, num_elem, Q * Q * Q, num_comp_u, num_comp_u * num_elem * Q * Q * Q, CEED_STRIDES_BACKEND, &elem_restr_u_i);
558 CeedElemRestrictionCreateStrided(ceed, num_elem, Q * Q * Q, bp_options[bp_choice].q_data_size,
559 bp_options[bp_choice].q_data_size * num_elem * Q * Q * Q, CEED_STRIDES_BACKEND, &elem_restr_qd_i);
560 {
561 CeedScalar *x_loc;
562 CeedInt shape[3] = {mesh_elem[0] + 1, mesh_elem[1] + 1, mesh_elem[2] + 1}, len = shape[0] * shape[1] * shape[2];
563
564 x_loc = malloc(len * num_comp_x * sizeof x_loc[0]);
565 for (CeedInt i = 0; i < shape[0]; i++) {
566 for (CeedInt j = 0; j < shape[1]; j++) {
567 for (CeedInt k = 0; k < shape[2]; k++) {
568 x_loc[dim * ((i * shape[1] + j) * shape[2] + k) + 0] = 1. * (i_rank[0] * mesh_elem[0] + i) / (p[0] * mesh_elem[0]);
569 x_loc[dim * ((i * shape[1] + j) * shape[2] + k) + 1] = 1. * (i_rank[1] * mesh_elem[1] + j) / (p[1] * mesh_elem[1]);
570 x_loc[dim * ((i * shape[1] + j) * shape[2] + k) + 2] = 1. * (i_rank[2] * mesh_elem[2] + k) / (p[2] * mesh_elem[2]);
571 }
572 }
573 }
574 CeedVectorCreate(ceed, len * num_comp_x, &x_coord);
575 CeedVectorSetArray(x_coord, CEED_MEM_HOST, CEED_OWN_POINTER, x_loc);
576 }
577
578 // Create the QFunction that builds the operator quadrature data
579 CeedQFunctionCreateInterior(ceed, 1, bp_options[bp_choice].setup_geo, bp_options[bp_choice].setup_geo_loc, &qf_setup_geo);
580 CeedQFunctionAddInput(qf_setup_geo, "x", num_comp_x, CEED_EVAL_INTERP);
581 CeedQFunctionAddInput(qf_setup_geo, "dx", num_comp_x * dim, CEED_EVAL_GRAD);
582 CeedQFunctionAddInput(qf_setup_geo, "weight", 1, CEED_EVAL_WEIGHT);
583 CeedQFunctionAddOutput(qf_setup_geo, "q_data", bp_options[bp_choice].q_data_size, CEED_EVAL_NONE);
584
585 // Create the QFunction that sets up the RHS and true solution
586 CeedQFunctionCreateInterior(ceed, 1, bp_options[bp_choice].setup_rhs, bp_options[bp_choice].setup_rhs_loc, &qf_setup_rhs);
587 CeedQFunctionAddInput(qf_setup_rhs, "x", num_comp_x, CEED_EVAL_INTERP);
588 CeedQFunctionAddInput(qf_setup_rhs, "q_data", bp_options[bp_choice].q_data_size, CEED_EVAL_NONE);
589 CeedQFunctionAddOutput(qf_setup_rhs, "true_soln", num_comp_u, CEED_EVAL_NONE);
590 CeedQFunctionAddOutput(qf_setup_rhs, "rhs", num_comp_u, CEED_EVAL_INTERP);
591
592 // Set up PDE operator
593 CeedQFunctionCreateInterior(ceed, 1, bp_options[bp_choice].apply, bp_options[bp_choice].apply_loc, &qf_apply);
594 // Add inputs and outputs
595 CeedInt in_scale = bp_options[bp_choice].in_mode == CEED_EVAL_GRAD ? 3 : 1;
596 CeedInt out_scale = bp_options[bp_choice].out_mode == CEED_EVAL_GRAD ? 3 : 1;
597 CeedQFunctionAddInput(qf_apply, "u", num_comp_u * in_scale, bp_options[bp_choice].in_mode);
598 CeedQFunctionAddInput(qf_apply, "q_data", bp_options[bp_choice].q_data_size, CEED_EVAL_NONE);
599 CeedQFunctionAddOutput(qf_apply, "v", num_comp_u * out_scale, bp_options[bp_choice].out_mode);
600
601 // Create the error qfunction
602 CeedQFunctionCreateInterior(ceed, 1, bp_options[bp_choice].error, bp_options[bp_choice].error_loc, &qf_error);
603 CeedQFunctionAddInput(qf_error, "u", num_comp_u, CEED_EVAL_INTERP);
604 CeedQFunctionAddInput(qf_error, "true_soln", num_comp_u, CEED_EVAL_NONE);
605 CeedQFunctionAddInput(qf_error, "qdata", bp_options[bp_choice].q_data_size, CEED_EVAL_NONE);
606 CeedQFunctionAddOutput(qf_error, "error", num_comp_u, CEED_EVAL_NONE);
607
608 // Create the persistent vectors that will be needed in setup
609 CeedInt num_qpts;
610 CeedBasisGetNumQuadraturePoints(basis_u, &num_qpts);
611 CeedVectorCreate(ceed, bp_options[bp_choice].q_data_size * num_elem * num_qpts, &q_data);
612 CeedVectorCreate(ceed, num_elem * num_qpts * num_comp_u, &target);
613 CeedVectorCreate(ceed, l_size * num_comp_u, &rhs_ceed);
614
615 // Create the operator that builds the quadrature data for the ceed operator
616 CeedOperatorCreate(ceed, qf_setup_geo, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_setup_geo);
617 CeedOperatorSetField(op_setup_geo, "x", elem_restr_x, basis_x, CEED_VECTOR_ACTIVE);
618 CeedOperatorSetField(op_setup_geo, "dx", elem_restr_x, basis_x, CEED_VECTOR_ACTIVE);
619 CeedOperatorSetField(op_setup_geo, "weight", CEED_ELEMRESTRICTION_NONE, basis_x, CEED_VECTOR_NONE);
620 CeedOperatorSetField(op_setup_geo, "q_data", elem_restr_qd_i, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE);
621
622 // Create the operator that builds the RHS and true solution
623 CeedOperatorCreate(ceed, qf_setup_rhs, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_setup_rhs);
624 CeedOperatorSetField(op_setup_rhs, "x", elem_restr_x, basis_x, CEED_VECTOR_ACTIVE);
625 CeedOperatorSetField(op_setup_rhs, "q_data", elem_restr_qd_i, CEED_BASIS_NONE, q_data);
626 CeedOperatorSetField(op_setup_rhs, "true_soln", elem_restr_u_i, CEED_BASIS_NONE, target);
627 CeedOperatorSetField(op_setup_rhs, "rhs", elem_restr_u, basis_u, CEED_VECTOR_ACTIVE);
628
629 // Create the mass or diff operator
630 CeedOperatorCreate(ceed, qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_apply);
631 CeedOperatorSetField(op_apply, "u", elem_restr_u, basis_u, CEED_VECTOR_ACTIVE);
632 CeedOperatorSetField(op_apply, "q_data", elem_restr_qd_i, CEED_BASIS_NONE, q_data);
633 CeedOperatorSetField(op_apply, "v", elem_restr_u, basis_u, CEED_VECTOR_ACTIVE);
634
635 // Create the error operator
636 CeedOperatorCreate(ceed, qf_error, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_error);
637 CeedOperatorSetField(op_error, "u", elem_restr_u, basis_u, CEED_VECTOR_ACTIVE);
638 CeedOperatorSetField(op_error, "true_soln", elem_restr_u_i, CEED_BASIS_NONE, target);
639 CeedOperatorSetField(op_error, "qdata", elem_restr_qd_i, CEED_BASIS_NONE, q_data);
640 CeedOperatorSetField(op_error, "error", elem_restr_u_i, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE);
641
642 // Set up Mat
643 PetscCall(PetscMalloc1(1, &op_apply_ctx));
644 op_apply_ctx->comm = comm;
645 op_apply_ctx->l_to_g = l_to_g;
646 if (bp_choice != CEED_BP1 && bp_choice != CEED_BP2) {
647 op_apply_ctx->l_to_g_0 = l_to_g_0;
648 op_apply_ctx->g_to_g_D = g_to_g_D;
649 }
650 op_apply_ctx->X_loc = X_loc;
651 PetscCall(VecDuplicate(X_loc, &op_apply_ctx->Y_loc));
652 CeedVectorCreate(ceed, l_size * num_comp_u, &op_apply_ctx->x_ceed);
653 CeedVectorCreate(ceed, l_size * num_comp_u, &op_apply_ctx->y_ceed);
654 op_apply_ctx->op = op_apply;
655 op_apply_ctx->q_data = q_data;
656 op_apply_ctx->ceed = ceed;
657
658 PetscCall(MatCreateShell(comm, m_nodes[0] * m_nodes[1] * m_nodes[2] * num_comp_u, m_nodes[0] * m_nodes[1] * m_nodes[2] * num_comp_u, PETSC_DECIDE,
659 PETSC_DECIDE, op_apply_ctx, &mat));
660 if (bp_choice == CEED_BP1 || bp_choice == CEED_BP2) {
661 PetscCall(MatShellSetOperation(mat, MATOP_MULT, (void (*)(void))MatMult_Mass));
662 } else {
663 PetscCall(MatShellSetOperation(mat, MATOP_MULT, (void (*)(void))MatMult_Diff));
664 }
665 PetscCall(VecGetType(X, &vec_type));
666 PetscCall(MatShellSetVecType(mat, vec_type));
667
668 // Get RHS vector
669 PetscCall(VecDuplicate(X, &rhs));
670 PetscCall(VecDuplicate(X_loc, &rhs_loc));
671 PetscCall(VecZeroEntries(rhs_loc));
672 PetscCall(VecGetArrayAndMemType(rhs_loc, &r, &mem_type));
673 CeedVectorSetArray(rhs_ceed, MemTypeP2C(mem_type), CEED_USE_POINTER, r);
674
675 // Setup q_data, rhs, and target
676 CeedOperatorApply(op_setup_geo, x_coord, q_data, CEED_REQUEST_IMMEDIATE);
677 CeedOperatorApply(op_setup_rhs, x_coord, rhs_ceed, CEED_REQUEST_IMMEDIATE);
678 CeedVectorDestroy(&x_coord);
679
680 // Gather RHS
681 PetscCall(CeedVectorTakeArray(rhs_ceed, MemTypeP2C(mem_type), NULL));
682 PetscCall(VecRestoreArrayAndMemType(rhs_loc, &r));
683 PetscCall(VecZeroEntries(rhs));
684 PetscCall(VecScatterBegin(l_to_g, rhs_loc, rhs, ADD_VALUES, SCATTER_FORWARD));
685 PetscCall(VecScatterEnd(l_to_g, rhs_loc, rhs, ADD_VALUES, SCATTER_FORWARD));
686 CeedVectorDestroy(&rhs_ceed);
687
688 PetscCall(KSPCreate(comm, &ksp));
689 {
690 PC pc;
691 PetscCall(KSPGetPC(ksp, &pc));
692 if (bp_choice == CEED_BP1 || bp_choice == CEED_BP2) {
693 PetscCall(PCSetType(pc, PCJACOBI));
694 PetscCall(PCJacobiSetType(pc, PC_JACOBI_ROWSUM));
695 } else {
696 PetscCall(PCSetType(pc, PCNONE));
697 }
698 PetscCall(KSPSetType(ksp, KSPCG));
699 PetscCall(KSPSetNormType(ksp, KSP_NORM_NATURAL));
700 PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
701 }
702 PetscCall(KSPSetOperators(ksp, mat, mat));
703 // First run's performance log is not considered for benchmarking purposes
704 PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1));
705 my_rt_start = MPI_Wtime();
706 PetscCall(KSPSolve(ksp, rhs, X));
707 my_rt = MPI_Wtime() - my_rt_start;
708 PetscCall(MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm));
709 // Set maxits based on first iteration timing
710 if (my_rt > 0.02) {
711 PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, ksp_max_it_clip[0]));
712 } else {
713 PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, ksp_max_it_clip[1]));
714 }
715 PetscCall(KSPSetFromOptions(ksp));
716
717 // Timed solve
718 PetscCall(VecZeroEntries(X));
719 PetscCall(PetscBarrier((PetscObject)ksp));
720
721 // -- Performance logging
722 PetscCall(PetscLogStageRegister("Solve Stage", &solve_stage));
723 PetscCall(PetscLogStagePush(solve_stage));
724
725 // -- Solve
726 my_rt_start = MPI_Wtime();
727 PetscCall(KSPSolve(ksp, rhs, X));
728 my_rt = MPI_Wtime() - my_rt_start;
729
730 // -- Performance logging
731 PetscCall(PetscLogStagePop());
732
733 // Output results
734 {
735 KSPType ksp_type;
736 KSPConvergedReason reason;
737 PetscReal rnorm;
738 PetscInt its;
739 PetscCall(KSPGetType(ksp, &ksp_type));
740 PetscCall(KSPGetConvergedReason(ksp, &reason));
741 PetscCall(KSPGetIterationNumber(ksp, &its));
742 PetscCall(KSPGetResidualNorm(ksp, &rnorm));
743 if (!test_mode || reason < 0 || rnorm > 1e-8) {
744 PetscCall(PetscPrintf(comm,
745 " KSP:\n"
746 " KSP Type : %s\n"
747 " KSP Convergence : %s\n"
748 " Total KSP Iterations : %" PetscInt_FMT "\n"
749 " Final rnorm : %e\n",
750 ksp_type, KSPConvergedReasons[reason], its, (double)rnorm));
751 }
752 if (!test_mode) {
753 PetscCall(PetscPrintf(comm, " Performance:\n"));
754 }
755 {
756 PetscReal max_error;
757 PetscCall(ComputeErrorMax(op_apply_ctx, op_error, X, target, &max_error));
758 PetscReal tol = 5e-2;
759 if (!test_mode || max_error > tol) {
760 PetscCall(MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm));
761 PetscCall(MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm));
762 PetscCall(PetscPrintf(comm,
763 " Pointwise Error (max) : %e\n"
764 " CG Solve Time : %g (%g) sec\n",
765 (double)max_error, rt_max, rt_min));
766 }
767 }
768 if (!test_mode) {
769 PetscCall(PetscPrintf(comm, " DoFs/Sec in CG : %g (%g) million\n", 1e-6 * gsize * its / rt_max,
770 1e-6 * gsize * its / rt_min));
771 }
772 }
773
774 if (write_solution) {
775 PetscViewer vtk_viewer_soln;
776
777 PetscCall(PetscViewerCreate(comm, &vtk_viewer_soln));
778 PetscCall(PetscViewerSetType(vtk_viewer_soln, PETSCVIEWERVTK));
779 PetscCall(PetscViewerFileSetName(vtk_viewer_soln, "solution.vtu"));
780 PetscCall(VecView(X, vtk_viewer_soln));
781 PetscCall(PetscViewerDestroy(&vtk_viewer_soln));
782 }
783
784 PetscCall(VecDestroy(&rhs));
785 PetscCall(VecDestroy(&rhs_loc));
786 PetscCall(VecDestroy(&X));
787 PetscCall(VecDestroy(&op_apply_ctx->X_loc));
788 PetscCall(VecDestroy(&op_apply_ctx->Y_loc));
789 PetscCall(VecScatterDestroy(&l_to_g));
790 PetscCall(VecScatterDestroy(&l_to_g_0));
791 PetscCall(VecScatterDestroy(&g_to_g_D));
792 PetscCall(MatDestroy(&mat));
793 PetscCall(KSPDestroy(&ksp));
794
795 CeedVectorDestroy(&op_apply_ctx->x_ceed);
796 CeedVectorDestroy(&op_apply_ctx->y_ceed);
797 CeedVectorDestroy(&op_apply_ctx->q_data);
798 CeedVectorDestroy(&target);
799 CeedElemRestrictionDestroy(&elem_restr_u);
800 CeedElemRestrictionDestroy(&elem_restr_x);
801 CeedElemRestrictionDestroy(&elem_restr_u_i);
802 CeedElemRestrictionDestroy(&elem_restr_qd_i);
803 CeedBasisDestroy(&basis_u);
804 CeedBasisDestroy(&basis_x);
805 CeedQFunctionDestroy(&qf_setup_geo);
806 CeedQFunctionDestroy(&qf_setup_rhs);
807 CeedQFunctionDestroy(&qf_apply);
808 CeedQFunctionDestroy(&qf_error);
809 CeedOperatorDestroy(&op_setup_geo);
810 CeedOperatorDestroy(&op_setup_rhs);
811 CeedOperatorDestroy(&op_apply);
812 CeedOperatorDestroy(&op_error);
813 CeedDestroy(&ceed);
814
815 PetscCall(PetscFree(op_apply_ctx));
816 return PetscFinalize();
817 }
818