1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 // libCEED + PETSc Example: CEED BPs 9 // 10 // This example demonstrates a simple usage of libCEED with PETSc to solve the 11 // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. 12 // 13 // The code is intentionally "raw", using only low-level communication 14 // primitives. 15 // 16 // Build with: 17 // 18 // make bpsraw [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 19 // 20 // Sample runs: 21 // 22 // ./bpsraw -problem bp1 23 // ./bpsraw -problem bp2 24 // ./bpsraw -problem bp3 25 // ./bpsraw -problem bp4 26 // ./bpsraw -problem bp5 -ceed /cpu/self 27 // ./bpsraw -problem bp6 -ceed /gpu/cuda 28 // 29 //TESTARGS -ceed {ceed_resource} -test -problem bp2 -degree 5 -q_extra 1 -ksp_max_it_clip 15,15 30 31 /// @file 32 /// CEED BPs example using PETSc 33 /// See bps.c for an implementation using DMPlex unstructured grids. 34 const char help[] = "Solve CEED BPs using PETSc\n"; 35 36 #include <ceed.h> 37 #include <petscksp.h> 38 #include <petscsys.h> 39 #include <stdbool.h> 40 #include <string.h> 41 42 #include "qfunctions/bps/bp1.h" 43 #include "qfunctions/bps/bp2.h" 44 #include "qfunctions/bps/bp3.h" 45 #include "qfunctions/bps/bp4.h" 46 #include "qfunctions/bps/common.h" 47 48 #if PETSC_VERSION_LT(3, 12, 0) 49 #ifdef PETSC_HAVE_CUDA 50 #include <petsccuda.h> 51 // Note: With PETSc prior to version 3.12.0, providing the source path to 52 // include 'cublas_v2.h' will be needed to use 'petsccuda.h'. 53 #endif 54 #endif 55 56 static CeedMemType MemTypeP2C(PetscMemType mem_type) { return PetscMemTypeDevice(mem_type) ? CEED_MEM_DEVICE : CEED_MEM_HOST; } 57 58 static void Split3(PetscInt size, PetscInt m[3], bool reverse) { 59 for (PetscInt d = 0, size_left = size; d < 3; d++) { 60 PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(size_left, 1. / (3 - d))); 61 while (try * (size_left / try) != size_left) try++; 62 m[reverse ? 2 - d : d] = try; 63 size_left /= try; 64 } 65 } 66 67 static PetscInt Max3(const PetscInt a[3]) { return PetscMax(a[0], PetscMax(a[1], a[2])); } 68 static PetscInt Min3(const PetscInt a[3]) { return PetscMin(a[0], PetscMin(a[1], a[2])); } 69 static void GlobalNodes(const PetscInt p[3], const PetscInt i_rank[3], PetscInt degree, const PetscInt mesh_elem[3], PetscInt m_nodes[3]) { 70 for (int d = 0; d < 3; d++) m_nodes[d] = degree * mesh_elem[d] + (i_rank[d] == p[d] - 1); 71 } 72 static PetscInt GlobalStart(const PetscInt p[3], const PetscInt i_rank[3], PetscInt degree, const PetscInt mesh_elem[3]) { 73 PetscInt start = 0; 74 // Dumb brute-force is easier to read 75 for (PetscInt i = 0; i < p[0]; i++) { 76 for (PetscInt j = 0; j < p[1]; j++) { 77 for (PetscInt k = 0; k < p[2]; k++) { 78 PetscInt m_nodes[3], ijk_rank[] = {i, j, k}; 79 if (i == i_rank[0] && j == i_rank[1] && k == i_rank[2]) return start; 80 GlobalNodes(p, ijk_rank, degree, mesh_elem, m_nodes); 81 start += m_nodes[0] * m_nodes[1] * m_nodes[2]; 82 } 83 } 84 } 85 return -1; 86 } 87 static PetscErrorCode CreateRestriction(Ceed ceed, const CeedInt mesh_elem[3], CeedInt P, CeedInt num_comp, CeedElemRestriction *elem_restr) { 88 const PetscInt num_elem = mesh_elem[0] * mesh_elem[1] * mesh_elem[2]; 89 PetscInt m_nodes[3], *idx, *idx_p; 90 91 PetscFunctionBeginUser; 92 // Get indicies 93 for (int d = 0; d < 3; d++) m_nodes[d] = mesh_elem[d] * (P - 1) + 1; 94 idx_p = idx = malloc(num_elem * P * P * P * sizeof idx[0]); 95 for (CeedInt i = 0; i < mesh_elem[0]; i++) { 96 for (CeedInt j = 0; j < mesh_elem[1]; j++) { 97 for (CeedInt k = 0; k < mesh_elem[2]; k++, idx_p += P * P * P) { 98 for (CeedInt ii = 0; ii < P; ii++) { 99 for (CeedInt jj = 0; jj < P; jj++) { 100 for (CeedInt kk = 0; kk < P; kk++) { 101 if (0) { // This is the C-style (i,j,k) ordering that I prefer 102 idx_p[(ii * P + jj) * P + kk] = num_comp * (((i * (P - 1) + ii) * m_nodes[1] + (j * (P - 1) + jj)) * m_nodes[2] + (k * (P - 1) + kk)); 103 } else { // (k,j,i) ordering for consistency with MFEM example 104 idx_p[ii + P * (jj + P * kk)] = num_comp * (((i * (P - 1) + ii) * m_nodes[1] + (j * (P - 1) + jj)) * m_nodes[2] + (k * (P - 1) + kk)); 105 } 106 } 107 } 108 } 109 } 110 } 111 } 112 113 // Setup CEED restriction 114 CeedElemRestrictionCreate(ceed, num_elem, P * P * P, num_comp, 1, m_nodes[0] * m_nodes[1] * m_nodes[2] * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 115 idx, elem_restr); 116 117 PetscFunctionReturn(0); 118 } 119 120 // Data for PETSc 121 typedef struct OperatorApplyContext_ *OperatorApplyContext; 122 struct OperatorApplyContext_ { 123 MPI_Comm comm; 124 VecScatter l_to_g; // Scatter for all entries 125 VecScatter l_to_g_0; // Skip Dirichlet values 126 VecScatter g_to_g_D; // global-to-global; only Dirichlet values 127 Vec X_loc, Y_loc; 128 CeedVector x_ceed, y_ceed; 129 CeedOperator op; 130 CeedVector q_data; 131 Ceed ceed; 132 }; 133 134 // BP Options 135 typedef enum { CEED_BP1 = 0, CEED_BP2 = 1, CEED_BP3 = 2, CEED_BP4 = 3, CEED_BP5 = 4, CEED_BP6 = 5 } BPType; 136 static const char *const bp_types[] = {"bp1", "bp2", "bp3", "bp4", "bp5", "bp6", "BPType", "CEED_BP", 0}; 137 138 // BP specific data 139 typedef struct { 140 CeedInt num_comp_u, q_data_size, q_extra; 141 CeedQFunctionUser setup_geo, setup_rhs, apply, error; 142 const char *setup_geo_loc, *setup_rhs_loc, *apply_loc, *error_loc; 143 CeedEvalMode in_mode, out_mode; 144 CeedQuadMode q_mode; 145 } BPData; 146 147 BPData bp_options[6] = { 148 [CEED_BP1] = {.num_comp_u = 1, 149 .q_data_size = 1, 150 .q_extra = 1, 151 .setup_geo = SetupMassGeo, 152 .setup_rhs = SetupMassRhs, 153 .apply = Mass, 154 .error = Error, 155 .setup_geo_loc = SetupMassGeo_loc, 156 .setup_rhs_loc = SetupMassRhs_loc, 157 .apply_loc = Mass_loc, 158 .error_loc = Error_loc, 159 .in_mode = CEED_EVAL_INTERP, 160 .out_mode = CEED_EVAL_INTERP, 161 .q_mode = CEED_GAUSS }, 162 [CEED_BP2] = {.num_comp_u = 3, 163 .q_data_size = 1, 164 .q_extra = 1, 165 .setup_geo = SetupMassGeo, 166 .setup_rhs = SetupMassRhs3, 167 .apply = Mass3, 168 .error = Error3, 169 .setup_geo_loc = SetupMassGeo_loc, 170 .setup_rhs_loc = SetupMassRhs3_loc, 171 .apply_loc = Mass3_loc, 172 .error_loc = Error3_loc, 173 .in_mode = CEED_EVAL_INTERP, 174 .out_mode = CEED_EVAL_INTERP, 175 .q_mode = CEED_GAUSS }, 176 [CEED_BP3] = {.num_comp_u = 1, 177 .q_data_size = 7, 178 .q_extra = 1, 179 .setup_geo = SetupDiffGeo, 180 .setup_rhs = SetupDiffRhs, 181 .apply = Diff, 182 .error = Error, 183 .setup_geo_loc = SetupDiffGeo_loc, 184 .setup_rhs_loc = SetupDiffRhs_loc, 185 .apply_loc = Diff_loc, 186 .error_loc = Error_loc, 187 .in_mode = CEED_EVAL_GRAD, 188 .out_mode = CEED_EVAL_GRAD, 189 .q_mode = CEED_GAUSS }, 190 [CEED_BP4] = {.num_comp_u = 3, 191 .q_data_size = 7, 192 .q_extra = 1, 193 .setup_geo = SetupDiffGeo, 194 .setup_rhs = SetupDiffRhs3, 195 .apply = Diff3, 196 .error = Error3, 197 .setup_geo_loc = SetupDiffGeo_loc, 198 .setup_rhs_loc = SetupDiffRhs3_loc, 199 .apply_loc = Diff3_loc, 200 .error_loc = Error3_loc, 201 .in_mode = CEED_EVAL_GRAD, 202 .out_mode = CEED_EVAL_GRAD, 203 .q_mode = CEED_GAUSS }, 204 [CEED_BP5] = {.num_comp_u = 1, 205 .q_data_size = 7, 206 .q_extra = 0, 207 .setup_geo = SetupDiffGeo, 208 .setup_rhs = SetupDiffRhs, 209 .apply = Diff, 210 .error = Error, 211 .setup_geo_loc = SetupDiffGeo_loc, 212 .setup_rhs_loc = SetupDiffRhs_loc, 213 .apply_loc = Diff_loc, 214 .error_loc = Error_loc, 215 .in_mode = CEED_EVAL_GRAD, 216 .out_mode = CEED_EVAL_GRAD, 217 .q_mode = CEED_GAUSS_LOBATTO}, 218 [CEED_BP6] = {.num_comp_u = 3, 219 .q_data_size = 7, 220 .q_extra = 0, 221 .setup_geo = SetupDiffGeo, 222 .setup_rhs = SetupDiffRhs3, 223 .apply = Diff3, 224 .error = Error3, 225 .setup_geo_loc = SetupDiffGeo_loc, 226 .setup_rhs_loc = SetupDiffRhs3_loc, 227 .apply_loc = Diff3_loc, 228 .error_loc = Error3_loc, 229 .in_mode = CEED_EVAL_GRAD, 230 .out_mode = CEED_EVAL_GRAD, 231 .q_mode = CEED_GAUSS_LOBATTO} 232 }; 233 234 // This function uses libCEED to compute the action of the mass matrix 235 static PetscErrorCode MatMult_Mass(Mat A, Vec X, Vec Y) { 236 OperatorApplyContext op_apply_ctx; 237 PetscScalar *x, *y; 238 PetscMemType x_mem_type, y_mem_type; 239 240 PetscFunctionBeginUser; 241 242 PetscCall(MatShellGetContext(A, &op_apply_ctx)); 243 244 // Global-to-local 245 PetscCall(VecScatterBegin(op_apply_ctx->l_to_g, X, op_apply_ctx->X_loc, INSERT_VALUES, SCATTER_REVERSE)); 246 PetscCall(VecScatterEnd(op_apply_ctx->l_to_g, X, op_apply_ctx->X_loc, INSERT_VALUES, SCATTER_REVERSE)); 247 248 // Setup libCEED vectors 249 PetscCall(VecGetArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x, &x_mem_type)); 250 PetscCall(VecGetArrayAndMemType(op_apply_ctx->Y_loc, &y, &y_mem_type)); 251 CeedVectorSetArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, x); 252 CeedVectorSetArray(op_apply_ctx->y_ceed, MemTypeP2C(y_mem_type), CEED_USE_POINTER, y); 253 254 // Apply libCEED operator 255 CeedOperatorApply(op_apply_ctx->op, op_apply_ctx->x_ceed, op_apply_ctx->y_ceed, CEED_REQUEST_IMMEDIATE); 256 257 // Restore PETSc vectors 258 CeedVectorTakeArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), NULL); 259 CeedVectorTakeArray(op_apply_ctx->y_ceed, MemTypeP2C(y_mem_type), NULL); 260 PetscCall(VecRestoreArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x)); 261 PetscCall(VecRestoreArrayAndMemType(op_apply_ctx->Y_loc, &y)); 262 263 // Local-to-global 264 if (Y) { 265 PetscCall(VecZeroEntries(Y)); 266 PetscCall(VecScatterBegin(op_apply_ctx->l_to_g, op_apply_ctx->Y_loc, Y, ADD_VALUES, SCATTER_FORWARD)); 267 PetscCall(VecScatterEnd(op_apply_ctx->l_to_g, op_apply_ctx->Y_loc, Y, ADD_VALUES, SCATTER_FORWARD)); 268 } 269 PetscFunctionReturn(0); 270 } 271 272 // This function uses libCEED to compute the action of the Laplacian with 273 // Dirichlet boundary conditions 274 static PetscErrorCode MatMult_Diff(Mat A, Vec X, Vec Y) { 275 OperatorApplyContext op_apply_ctx; 276 PetscScalar *x, *y; 277 PetscMemType x_mem_type, y_mem_type; 278 279 PetscFunctionBeginUser; 280 281 PetscCall(MatShellGetContext(A, &op_apply_ctx)); 282 283 // Global-to-local 284 PetscCall(VecScatterBegin(op_apply_ctx->l_to_g_0, X, op_apply_ctx->X_loc, INSERT_VALUES, SCATTER_REVERSE)); 285 PetscCall(VecScatterEnd(op_apply_ctx->l_to_g_0, X, op_apply_ctx->X_loc, INSERT_VALUES, SCATTER_REVERSE)); 286 287 // Setup libCEED vectors 288 PetscCall(VecGetArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x, &x_mem_type)); 289 PetscCall(VecGetArrayAndMemType(op_apply_ctx->Y_loc, &y, &y_mem_type)); 290 CeedVectorSetArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, x); 291 CeedVectorSetArray(op_apply_ctx->y_ceed, MemTypeP2C(y_mem_type), CEED_USE_POINTER, y); 292 293 // Apply libCEED operator 294 CeedOperatorApply(op_apply_ctx->op, op_apply_ctx->x_ceed, op_apply_ctx->y_ceed, CEED_REQUEST_IMMEDIATE); 295 296 // Restore PETSc vectors 297 CeedVectorTakeArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), NULL); 298 CeedVectorTakeArray(op_apply_ctx->y_ceed, MemTypeP2C(y_mem_type), NULL); 299 PetscCall(VecRestoreArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x)); 300 PetscCall(VecRestoreArrayAndMemType(op_apply_ctx->Y_loc, &y)); 301 302 // Local-to-global 303 PetscCall(VecZeroEntries(Y)); 304 PetscCall(VecScatterBegin(op_apply_ctx->g_to_g_D, X, Y, INSERT_VALUES, SCATTER_FORWARD)); 305 PetscCall(VecScatterEnd(op_apply_ctx->g_to_g_D, X, Y, INSERT_VALUES, SCATTER_FORWARD)); 306 PetscCall(VecScatterBegin(op_apply_ctx->l_to_g_0, op_apply_ctx->Y_loc, Y, ADD_VALUES, SCATTER_FORWARD)); 307 PetscCall(VecScatterEnd(op_apply_ctx->l_to_g_0, op_apply_ctx->Y_loc, Y, ADD_VALUES, SCATTER_FORWARD)); 308 309 PetscFunctionReturn(0); 310 } 311 312 // This function calculates the error in the final solution 313 static PetscErrorCode ComputeErrorMax(OperatorApplyContext op_apply_ctx, CeedOperator op_error, Vec X, CeedVector target, PetscReal *max_error) { 314 PetscScalar *x; 315 PetscMemType mem_type; 316 CeedVector collocated_error; 317 CeedSize length; 318 319 PetscFunctionBeginUser; 320 321 CeedVectorGetLength(target, &length); 322 CeedVectorCreate(op_apply_ctx->ceed, length, &collocated_error); 323 324 // Global-to-local 325 PetscCall(VecScatterBegin(op_apply_ctx->l_to_g, X, op_apply_ctx->X_loc, INSERT_VALUES, SCATTER_REVERSE)); 326 PetscCall(VecScatterEnd(op_apply_ctx->l_to_g, X, op_apply_ctx->X_loc, INSERT_VALUES, SCATTER_REVERSE)); 327 328 // Setup libCEED vector 329 PetscCall(VecGetArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x, &mem_type)); 330 CeedVectorSetArray(op_apply_ctx->x_ceed, MemTypeP2C(mem_type), CEED_USE_POINTER, x); 331 332 // Apply libCEED operator 333 CeedOperatorApply(op_error, op_apply_ctx->x_ceed, collocated_error, CEED_REQUEST_IMMEDIATE); 334 335 // Restore PETSc vector 336 CeedVectorTakeArray(op_apply_ctx->x_ceed, MemTypeP2C(mem_type), NULL); 337 PetscCall(VecRestoreArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x)); 338 339 // Reduce max error 340 *max_error = 0; 341 const CeedScalar *e; 342 CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &e); 343 for (CeedInt i = 0; i < length; i++) { 344 *max_error = PetscMax(*max_error, PetscAbsScalar(e[i])); 345 } 346 CeedVectorRestoreArrayRead(collocated_error, &e); 347 PetscCall(MPI_Allreduce(MPI_IN_PLACE, max_error, 1, MPIU_REAL, MPIU_MAX, op_apply_ctx->comm)); 348 349 // Cleanup 350 CeedVectorDestroy(&collocated_error); 351 352 PetscFunctionReturn(0); 353 } 354 355 int main(int argc, char **argv) { 356 MPI_Comm comm; 357 char ceed_resource[PETSC_MAX_PATH_LEN] = "/cpu/self"; 358 double my_rt_start, my_rt, rt_min, rt_max; 359 PetscInt degree, q_extra, local_nodes, local_elem, mesh_elem[3], m_nodes[3], p[3], i_rank[3], l_nodes[3], l_size, num_comp_u = 1, 360 ksp_max_it_clip[2]; 361 PetscScalar *r; 362 PetscBool test_mode, benchmark_mode, write_solution; 363 PetscMPIInt size, rank; 364 PetscLogStage solve_stage; 365 Vec X, X_loc, rhs, rhs_loc; 366 Mat mat; 367 KSP ksp; 368 VecScatter l_to_g, l_to_g_0, g_to_g_D; 369 PetscMemType mem_type; 370 OperatorApplyContext op_apply_ctx; 371 Ceed ceed; 372 CeedBasis basis_x, basis_u; 373 CeedElemRestriction elem_restr_x, elem_restr_u, elem_restr_u_i, elem_restr_qd_i; 374 CeedQFunction qf_setup_geo, qf_setup_rhs, qf_apply, qf_error; 375 CeedOperator op_setup_geo, op_setup_rhs, op_apply, op_error; 376 CeedVector x_coord, q_data, rhs_ceed, target; 377 CeedInt P, Q; 378 const CeedInt dim = 3, num_comp_x = 3; 379 BPType bp_choice; 380 381 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 382 comm = PETSC_COMM_WORLD; 383 384 // Read command line options 385 PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); 386 bp_choice = CEED_BP1; 387 PetscCall(PetscOptionsEnum("-problem", "CEED benchmark problem to solve", NULL, bp_types, (PetscEnum)bp_choice, (PetscEnum *)&bp_choice, NULL)); 388 num_comp_u = bp_options[bp_choice].num_comp_u; 389 test_mode = PETSC_FALSE; 390 PetscCall(PetscOptionsBool("-test", "Testing mode (do not print unless error is large)", NULL, test_mode, &test_mode, NULL)); 391 benchmark_mode = PETSC_FALSE; 392 PetscCall(PetscOptionsBool("-benchmark", "Benchmarking mode (prints benchmark statistics)", NULL, benchmark_mode, &benchmark_mode, NULL)); 393 write_solution = PETSC_FALSE; 394 PetscCall(PetscOptionsBool("-write_solution", "Write solution for visualization", NULL, write_solution, &write_solution, NULL)); 395 degree = test_mode ? 3 : 1; 396 PetscCall(PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", NULL, degree, °ree, NULL)); 397 q_extra = bp_options[bp_choice].q_extra; 398 PetscCall(PetscOptionsInt("-q_extra", "Number of extra quadrature points", NULL, q_extra, &q_extra, NULL)); 399 PetscCall(PetscOptionsString("-ceed", "CEED resource specifier", NULL, ceed_resource, ceed_resource, sizeof(ceed_resource), NULL)); 400 local_nodes = 1000; 401 PetscCall(PetscOptionsInt("-local", "Target number of locally owned nodes per process", NULL, local_nodes, &local_nodes, NULL)); 402 PetscInt two = 2; 403 ksp_max_it_clip[0] = 5; 404 ksp_max_it_clip[1] = 20; 405 PetscCall( 406 PetscOptionsIntArray("-ksp_max_it_clip", "Min and max number of iterations to use during benchmarking", NULL, ksp_max_it_clip, &two, NULL)); 407 PetscOptionsEnd(); 408 P = degree + 1; 409 Q = P + q_extra; 410 411 // Set up libCEED 412 CeedInit(ceed_resource, &ceed); 413 CeedMemType mem_type_backend; 414 CeedGetPreferredMemType(ceed, &mem_type_backend); 415 416 VecType default_vec_type = NULL, vec_type; 417 switch (mem_type_backend) { 418 case CEED_MEM_HOST: 419 default_vec_type = VECSTANDARD; 420 break; 421 case CEED_MEM_DEVICE: { 422 const char *resolved; 423 CeedGetResource(ceed, &resolved); 424 if (strstr(resolved, "/gpu/cuda")) default_vec_type = VECCUDA; 425 else if (strstr(resolved, "/gpu/hip/occa")) default_vec_type = VECSTANDARD; // https://github.com/CEED/libCEED/issues/678 426 else if (strstr(resolved, "/gpu/hip")) default_vec_type = VECHIP; 427 else default_vec_type = VECSTANDARD; 428 } 429 } 430 431 // Determine size of process grid 432 PetscCall(MPI_Comm_size(comm, &size)); 433 Split3(size, p, false); 434 435 // Find a nicely composite number of elements no less than local_nodes 436 for (local_elem = PetscMax(1, local_nodes / (degree * degree * degree));; local_elem++) { 437 Split3(local_elem, mesh_elem, true); 438 if (Max3(mesh_elem) / Min3(mesh_elem) <= 2) break; 439 } 440 441 // Find my location in the process grid 442 PetscCall(MPI_Comm_rank(comm, &rank)); 443 for (int d = 0, rank_left = rank; d < dim; d++) { 444 const int pstride[3] = {p[1] * p[2], p[2], 1}; 445 i_rank[d] = rank_left / pstride[d]; 446 rank_left -= i_rank[d] * pstride[d]; 447 } 448 449 GlobalNodes(p, i_rank, degree, mesh_elem, m_nodes); 450 451 // Setup global vector 452 PetscCall(VecCreate(comm, &X)); 453 PetscCall(VecSetType(X, default_vec_type)); 454 PetscCall(VecSetSizes(X, m_nodes[0] * m_nodes[1] * m_nodes[2] * num_comp_u, PETSC_DECIDE)); 455 PetscCall(VecSetFromOptions(X)); 456 PetscCall(VecSetUp(X)); 457 458 // Set up libCEED 459 CeedInit(ceed_resource, &ceed); 460 461 // Print summary 462 CeedInt gsize; 463 PetscCall(VecGetSize(X, &gsize)); 464 if (!test_mode) { 465 const char *used_resource; 466 CeedGetResource(ceed, &used_resource); 467 468 PetscCall(VecGetType(X, &vec_type)); 469 470 PetscCall(PetscPrintf(comm, 471 "\n-- CEED Benchmark Problem %" CeedInt_FMT " -- libCEED + PETSc --\n" 472 " PETSc:\n" 473 " PETSc Vec Type : %s\n" 474 " libCEED:\n" 475 " libCEED Backend : %s\n" 476 " libCEED Backend MemType : %s\n" 477 " Mesh:\n" 478 " Solution Order (P) : %" CeedInt_FMT "\n" 479 " Quadrature Order (Q) : %" CeedInt_FMT "\n" 480 " Global nodes : %" PetscInt_FMT "\n" 481 " Process Decomposition : %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n" 482 " Local Elements : %" PetscInt_FMT " = %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n" 483 " Owned nodes : %" PetscInt_FMT " = %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n" 484 " DoF per node : %" PetscInt_FMT "\n", 485 bp_choice + 1, vec_type, used_resource, CeedMemTypes[mem_type_backend], P, Q, gsize / num_comp_u, p[0], p[1], p[2], 486 local_elem, mesh_elem[0], mesh_elem[1], mesh_elem[2], m_nodes[0] * m_nodes[1] * m_nodes[2], m_nodes[0], m_nodes[1], 487 m_nodes[2], num_comp_u)); 488 } 489 490 { 491 l_size = 1; 492 for (int d = 0; d < dim; d++) { 493 l_nodes[d] = mesh_elem[d] * degree + 1; 494 l_size *= l_nodes[d]; 495 } 496 PetscCall(VecCreate(PETSC_COMM_SELF, &X_loc)); 497 PetscCall(VecSetType(X_loc, default_vec_type)); 498 PetscCall(VecSetSizes(X_loc, l_size * num_comp_u, PETSC_DECIDE)); 499 PetscCall(VecSetFromOptions(X_loc)); 500 PetscCall(VecSetUp(X_loc)); 501 502 // Create local-to-global scatter 503 PetscInt *l_to_g_ind, *l_to_g_ind_0, *loc_ind, l_0_count; 504 IS l_to_g_is, l_to_g_is_0, loc_is; 505 PetscInt g_start[2][2][2], g_m_nodes[2][2][2][dim]; 506 507 for (int i = 0; i < 2; i++) { 508 for (int j = 0; j < 2; j++) { 509 for (int k = 0; k < 2; k++) { 510 PetscInt ijk_rank[3] = {i_rank[0] + i, i_rank[1] + j, i_rank[2] + k}; 511 g_start[i][j][k] = GlobalStart(p, ijk_rank, degree, mesh_elem); 512 GlobalNodes(p, ijk_rank, degree, mesh_elem, g_m_nodes[i][j][k]); 513 } 514 } 515 } 516 517 PetscCall(PetscMalloc1(l_size, &l_to_g_ind)); 518 PetscCall(PetscMalloc1(l_size, &l_to_g_ind_0)); 519 PetscCall(PetscMalloc1(l_size, &loc_ind)); 520 l_0_count = 0; 521 for (PetscInt i = 0, ir, ii; ir = i >= m_nodes[0], ii = i - ir * m_nodes[0], i < l_nodes[0]; i++) { 522 for (PetscInt j = 0, jr, jj; jr = j >= m_nodes[1], jj = j - jr * m_nodes[1], j < l_nodes[1]; j++) { 523 for (PetscInt k = 0, kr, kk; kr = k >= m_nodes[2], kk = k - kr * m_nodes[2], k < l_nodes[2]; k++) { 524 PetscInt here = (i * l_nodes[1] + j) * l_nodes[2] + k; 525 l_to_g_ind[here] = g_start[ir][jr][kr] + (ii * g_m_nodes[ir][jr][kr][1] + jj) * g_m_nodes[ir][jr][kr][2] + kk; 526 if ((i_rank[0] == 0 && i == 0) || (i_rank[1] == 0 && j == 0) || (i_rank[2] == 0 && k == 0) || 527 (i_rank[0] + 1 == p[0] && i + 1 == l_nodes[0]) || (i_rank[1] + 1 == p[1] && j + 1 == l_nodes[1]) || 528 (i_rank[2] + 1 == p[2] && k + 1 == l_nodes[2])) 529 continue; 530 l_to_g_ind_0[l_0_count] = l_to_g_ind[here]; 531 loc_ind[l_0_count++] = here; 532 } 533 } 534 } 535 PetscCall(ISCreateBlock(comm, num_comp_u, l_size, l_to_g_ind, PETSC_OWN_POINTER, &l_to_g_is)); 536 PetscCall(VecScatterCreate(X_loc, NULL, X, l_to_g_is, &l_to_g)); 537 PetscCall(ISCreateBlock(comm, num_comp_u, l_0_count, l_to_g_ind_0, PETSC_OWN_POINTER, &l_to_g_is_0)); 538 PetscCall(ISCreateBlock(comm, num_comp_u, l_0_count, loc_ind, PETSC_OWN_POINTER, &loc_is)); 539 PetscCall(VecScatterCreate(X_loc, loc_is, X, l_to_g_is_0, &l_to_g_0)); 540 { 541 // Create global-to-global scatter for Dirichlet values (everything not in 542 // l_to_g_is_0, which is the range of l_to_g_0) 543 PetscInt x_start, x_end, *ind_D, count_D = 0; 544 IS is_D; 545 const PetscScalar *x; 546 PetscCall(VecZeroEntries(X_loc)); 547 PetscCall(VecSet(X, 1.0)); 548 PetscCall(VecScatterBegin(l_to_g_0, X_loc, X, INSERT_VALUES, SCATTER_FORWARD)); 549 PetscCall(VecScatterEnd(l_to_g_0, X_loc, X, INSERT_VALUES, SCATTER_FORWARD)); 550 PetscCall(VecGetOwnershipRange(X, &x_start, &x_end)); 551 PetscCall(PetscMalloc1(x_end - x_start, &ind_D)); 552 PetscCall(VecGetArrayRead(X, &x)); 553 for (PetscInt i = 0; i < x_end - x_start; i++) { 554 if (x[i] == 1.) ind_D[count_D++] = x_start + i; 555 } 556 PetscCall(VecRestoreArrayRead(X, &x)); 557 PetscCall(ISCreateGeneral(comm, count_D, ind_D, PETSC_COPY_VALUES, &is_D)); 558 PetscCall(PetscFree(ind_D)); 559 PetscCall(VecScatterCreate(X, is_D, X, is_D, &g_to_g_D)); 560 PetscCall(ISDestroy(&is_D)); 561 } 562 PetscCall(ISDestroy(&l_to_g_is)); 563 PetscCall(ISDestroy(&l_to_g_is_0)); 564 PetscCall(ISDestroy(&loc_is)); 565 } 566 567 // CEED bases 568 CeedBasisCreateTensorH1Lagrange(ceed, dim, num_comp_u, P, Q, bp_options[bp_choice].q_mode, &basis_u); 569 CeedBasisCreateTensorH1Lagrange(ceed, dim, num_comp_x, 2, Q, bp_options[bp_choice].q_mode, &basis_x); 570 571 // CEED restrictions 572 CreateRestriction(ceed, mesh_elem, P, num_comp_u, &elem_restr_u); 573 CreateRestriction(ceed, mesh_elem, 2, dim, &elem_restr_x); 574 CeedInt num_elem = mesh_elem[0] * mesh_elem[1] * mesh_elem[2]; 575 CeedElemRestrictionCreateStrided(ceed, num_elem, Q * Q * Q, num_comp_u, num_comp_u * num_elem * Q * Q * Q, CEED_STRIDES_BACKEND, &elem_restr_u_i); 576 CeedElemRestrictionCreateStrided(ceed, num_elem, Q * Q * Q, bp_options[bp_choice].q_data_size, 577 bp_options[bp_choice].q_data_size * num_elem * Q * Q * Q, CEED_STRIDES_BACKEND, &elem_restr_qd_i); 578 { 579 CeedScalar *x_loc; 580 CeedInt shape[3] = {mesh_elem[0] + 1, mesh_elem[1] + 1, mesh_elem[2] + 1}, len = shape[0] * shape[1] * shape[2]; 581 x_loc = malloc(len * num_comp_x * sizeof x_loc[0]); 582 for (CeedInt i = 0; i < shape[0]; i++) { 583 for (CeedInt j = 0; j < shape[1]; j++) { 584 for (CeedInt k = 0; k < shape[2]; k++) { 585 x_loc[dim * ((i * shape[1] + j) * shape[2] + k) + 0] = 1. * (i_rank[0] * mesh_elem[0] + i) / (p[0] * mesh_elem[0]); 586 x_loc[dim * ((i * shape[1] + j) * shape[2] + k) + 1] = 1. * (i_rank[1] * mesh_elem[1] + j) / (p[1] * mesh_elem[1]); 587 x_loc[dim * ((i * shape[1] + j) * shape[2] + k) + 2] = 1. * (i_rank[2] * mesh_elem[2] + k) / (p[2] * mesh_elem[2]); 588 } 589 } 590 } 591 CeedVectorCreate(ceed, len * num_comp_x, &x_coord); 592 CeedVectorSetArray(x_coord, CEED_MEM_HOST, CEED_OWN_POINTER, x_loc); 593 } 594 595 // Create the QFunction that builds the operator quadrature data 596 CeedQFunctionCreateInterior(ceed, 1, bp_options[bp_choice].setup_geo, bp_options[bp_choice].setup_geo_loc, &qf_setup_geo); 597 CeedQFunctionAddInput(qf_setup_geo, "x", num_comp_x, CEED_EVAL_INTERP); 598 CeedQFunctionAddInput(qf_setup_geo, "dx", num_comp_x * dim, CEED_EVAL_GRAD); 599 CeedQFunctionAddInput(qf_setup_geo, "weight", 1, CEED_EVAL_WEIGHT); 600 CeedQFunctionAddOutput(qf_setup_geo, "q_data", bp_options[bp_choice].q_data_size, CEED_EVAL_NONE); 601 602 // Create the QFunction that sets up the RHS and true solution 603 CeedQFunctionCreateInterior(ceed, 1, bp_options[bp_choice].setup_rhs, bp_options[bp_choice].setup_rhs_loc, &qf_setup_rhs); 604 CeedQFunctionAddInput(qf_setup_rhs, "x", num_comp_x, CEED_EVAL_INTERP); 605 CeedQFunctionAddInput(qf_setup_rhs, "q_data", bp_options[bp_choice].q_data_size, CEED_EVAL_NONE); 606 CeedQFunctionAddOutput(qf_setup_rhs, "true_soln", num_comp_u, CEED_EVAL_NONE); 607 CeedQFunctionAddOutput(qf_setup_rhs, "rhs", num_comp_u, CEED_EVAL_INTERP); 608 609 // Set up PDE operator 610 CeedQFunctionCreateInterior(ceed, 1, bp_options[bp_choice].apply, bp_options[bp_choice].apply_loc, &qf_apply); 611 // Add inputs and outputs 612 CeedInt in_scale = bp_options[bp_choice].in_mode == CEED_EVAL_GRAD ? 3 : 1; 613 CeedInt out_scale = bp_options[bp_choice].out_mode == CEED_EVAL_GRAD ? 3 : 1; 614 CeedQFunctionAddInput(qf_apply, "u", num_comp_u * in_scale, bp_options[bp_choice].in_mode); 615 CeedQFunctionAddInput(qf_apply, "q_data", bp_options[bp_choice].q_data_size, CEED_EVAL_NONE); 616 CeedQFunctionAddOutput(qf_apply, "v", num_comp_u * out_scale, bp_options[bp_choice].out_mode); 617 618 // Create the error qfunction 619 CeedQFunctionCreateInterior(ceed, 1, bp_options[bp_choice].error, bp_options[bp_choice].error_loc, &qf_error); 620 CeedQFunctionAddInput(qf_error, "u", num_comp_u, CEED_EVAL_INTERP); 621 CeedQFunctionAddInput(qf_error, "true_soln", num_comp_u, CEED_EVAL_NONE); 622 CeedQFunctionAddInput(qf_error, "qdata", bp_options[bp_choice].q_data_size, CEED_EVAL_NONE); 623 CeedQFunctionAddOutput(qf_error, "error", num_comp_u, CEED_EVAL_NONE); 624 625 // Create the persistent vectors that will be needed in setup 626 CeedInt num_qpts; 627 CeedBasisGetNumQuadraturePoints(basis_u, &num_qpts); 628 CeedVectorCreate(ceed, bp_options[bp_choice].q_data_size * num_elem * num_qpts, &q_data); 629 CeedVectorCreate(ceed, num_elem * num_qpts * num_comp_u, &target); 630 CeedVectorCreate(ceed, l_size * num_comp_u, &rhs_ceed); 631 632 // Create the operator that builds the quadrature data for the ceed operator 633 CeedOperatorCreate(ceed, qf_setup_geo, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_setup_geo); 634 CeedOperatorSetField(op_setup_geo, "x", elem_restr_x, basis_x, CEED_VECTOR_ACTIVE); 635 CeedOperatorSetField(op_setup_geo, "dx", elem_restr_x, basis_x, CEED_VECTOR_ACTIVE); 636 CeedOperatorSetField(op_setup_geo, "weight", CEED_ELEMRESTRICTION_NONE, basis_x, CEED_VECTOR_NONE); 637 CeedOperatorSetField(op_setup_geo, "q_data", elem_restr_qd_i, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 638 639 // Create the operator that builds the RHS and true solution 640 CeedOperatorCreate(ceed, qf_setup_rhs, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_setup_rhs); 641 CeedOperatorSetField(op_setup_rhs, "x", elem_restr_x, basis_x, CEED_VECTOR_ACTIVE); 642 CeedOperatorSetField(op_setup_rhs, "q_data", elem_restr_qd_i, CEED_BASIS_COLLOCATED, q_data); 643 CeedOperatorSetField(op_setup_rhs, "true_soln", elem_restr_u_i, CEED_BASIS_COLLOCATED, target); 644 CeedOperatorSetField(op_setup_rhs, "rhs", elem_restr_u, basis_u, CEED_VECTOR_ACTIVE); 645 646 // Create the mass or diff operator 647 CeedOperatorCreate(ceed, qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_apply); 648 CeedOperatorSetField(op_apply, "u", elem_restr_u, basis_u, CEED_VECTOR_ACTIVE); 649 CeedOperatorSetField(op_apply, "q_data", elem_restr_qd_i, CEED_BASIS_COLLOCATED, q_data); 650 CeedOperatorSetField(op_apply, "v", elem_restr_u, basis_u, CEED_VECTOR_ACTIVE); 651 652 // Create the error operator 653 CeedOperatorCreate(ceed, qf_error, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_error); 654 CeedOperatorSetField(op_error, "u", elem_restr_u, basis_u, CEED_VECTOR_ACTIVE); 655 CeedOperatorSetField(op_error, "true_soln", elem_restr_u_i, CEED_BASIS_COLLOCATED, target); 656 CeedOperatorSetField(op_error, "qdata", elem_restr_qd_i, CEED_BASIS_COLLOCATED, q_data); 657 CeedOperatorSetField(op_error, "error", elem_restr_u_i, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 658 659 // Set up Mat 660 PetscCall(PetscMalloc1(1, &op_apply_ctx)); 661 op_apply_ctx->comm = comm; 662 op_apply_ctx->l_to_g = l_to_g; 663 if (bp_choice != CEED_BP1 && bp_choice != CEED_BP2) { 664 op_apply_ctx->l_to_g_0 = l_to_g_0; 665 op_apply_ctx->g_to_g_D = g_to_g_D; 666 } 667 op_apply_ctx->X_loc = X_loc; 668 PetscCall(VecDuplicate(X_loc, &op_apply_ctx->Y_loc)); 669 CeedVectorCreate(ceed, l_size * num_comp_u, &op_apply_ctx->x_ceed); 670 CeedVectorCreate(ceed, l_size * num_comp_u, &op_apply_ctx->y_ceed); 671 op_apply_ctx->op = op_apply; 672 op_apply_ctx->q_data = q_data; 673 op_apply_ctx->ceed = ceed; 674 675 PetscCall(MatCreateShell(comm, m_nodes[0] * m_nodes[1] * m_nodes[2] * num_comp_u, m_nodes[0] * m_nodes[1] * m_nodes[2] * num_comp_u, PETSC_DECIDE, 676 PETSC_DECIDE, op_apply_ctx, &mat)); 677 if (bp_choice == CEED_BP1 || bp_choice == CEED_BP2) { 678 PetscCall(MatShellSetOperation(mat, MATOP_MULT, (void (*)(void))MatMult_Mass)); 679 } else { 680 PetscCall(MatShellSetOperation(mat, MATOP_MULT, (void (*)(void))MatMult_Diff)); 681 } 682 PetscCall(VecGetType(X, &vec_type)); 683 PetscCall(MatShellSetVecType(mat, vec_type)); 684 685 // Get RHS vector 686 PetscCall(VecDuplicate(X, &rhs)); 687 PetscCall(VecDuplicate(X_loc, &rhs_loc)); 688 PetscCall(VecZeroEntries(rhs_loc)); 689 PetscCall(VecGetArrayAndMemType(rhs_loc, &r, &mem_type)); 690 CeedVectorSetArray(rhs_ceed, MemTypeP2C(mem_type), CEED_USE_POINTER, r); 691 692 // Setup q_data, rhs, and target 693 CeedOperatorApply(op_setup_geo, x_coord, q_data, CEED_REQUEST_IMMEDIATE); 694 CeedOperatorApply(op_setup_rhs, x_coord, rhs_ceed, CEED_REQUEST_IMMEDIATE); 695 CeedVectorDestroy(&x_coord); 696 697 // Gather RHS 698 PetscCall(CeedVectorTakeArray(rhs_ceed, MemTypeP2C(mem_type), NULL)); 699 PetscCall(VecRestoreArrayAndMemType(rhs_loc, &r)); 700 PetscCall(VecZeroEntries(rhs)); 701 PetscCall(VecScatterBegin(l_to_g, rhs_loc, rhs, ADD_VALUES, SCATTER_FORWARD)); 702 PetscCall(VecScatterEnd(l_to_g, rhs_loc, rhs, ADD_VALUES, SCATTER_FORWARD)); 703 CeedVectorDestroy(&rhs_ceed); 704 705 PetscCall(KSPCreate(comm, &ksp)); 706 { 707 PC pc; 708 PetscCall(KSPGetPC(ksp, &pc)); 709 if (bp_choice == CEED_BP1 || bp_choice == CEED_BP2) { 710 PetscCall(PCSetType(pc, PCJACOBI)); 711 PetscCall(PCJacobiSetType(pc, PC_JACOBI_ROWSUM)); 712 } else { 713 PetscCall(PCSetType(pc, PCNONE)); 714 } 715 PetscCall(KSPSetType(ksp, KSPCG)); 716 PetscCall(KSPSetNormType(ksp, KSP_NORM_NATURAL)); 717 PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT)); 718 } 719 PetscCall(KSPSetOperators(ksp, mat, mat)); 720 // First run's performance log is not considered for benchmarking purposes 721 PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1)); 722 my_rt_start = MPI_Wtime(); 723 PetscCall(KSPSolve(ksp, rhs, X)); 724 my_rt = MPI_Wtime() - my_rt_start; 725 PetscCall(MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm)); 726 // Set maxits based on first iteration timing 727 if (my_rt > 0.02) { 728 PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, ksp_max_it_clip[0])); 729 } else { 730 PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, ksp_max_it_clip[1])); 731 } 732 PetscCall(KSPSetFromOptions(ksp)); 733 734 // Timed solve 735 PetscCall(VecZeroEntries(X)); 736 PetscCall(PetscBarrier((PetscObject)ksp)); 737 738 // -- Performance logging 739 PetscCall(PetscLogStageRegister("Solve Stage", &solve_stage)); 740 PetscCall(PetscLogStagePush(solve_stage)); 741 742 // -- Solve 743 my_rt_start = MPI_Wtime(); 744 PetscCall(KSPSolve(ksp, rhs, X)); 745 my_rt = MPI_Wtime() - my_rt_start; 746 747 // -- Performance logging 748 PetscCall(PetscLogStagePop()); 749 750 // Output results 751 { 752 KSPType ksp_type; 753 KSPConvergedReason reason; 754 PetscReal rnorm; 755 PetscInt its; 756 PetscCall(KSPGetType(ksp, &ksp_type)); 757 PetscCall(KSPGetConvergedReason(ksp, &reason)); 758 PetscCall(KSPGetIterationNumber(ksp, &its)); 759 PetscCall(KSPGetResidualNorm(ksp, &rnorm)); 760 if (!test_mode || reason < 0 || rnorm > 1e-8) { 761 PetscCall(PetscPrintf(comm, 762 " KSP:\n" 763 " KSP Type : %s\n" 764 " KSP Convergence : %s\n" 765 " Total KSP Iterations : %" PetscInt_FMT "\n" 766 " Final rnorm : %e\n", 767 ksp_type, KSPConvergedReasons[reason], its, (double)rnorm)); 768 } 769 if (!test_mode) { 770 PetscCall(PetscPrintf(comm, " Performance:\n")); 771 } 772 { 773 PetscReal max_error; 774 PetscCall(ComputeErrorMax(op_apply_ctx, op_error, X, target, &max_error)); 775 PetscReal tol = 5e-2; 776 if (!test_mode || max_error > tol) { 777 PetscCall(MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm)); 778 PetscCall(MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm)); 779 PetscCall(PetscPrintf(comm, 780 " Pointwise Error (max) : %e\n" 781 " CG Solve Time : %g (%g) sec\n", 782 (double)max_error, rt_max, rt_min)); 783 } 784 } 785 if (!test_mode) { 786 PetscCall( 787 PetscPrintf(comm, " DoFs/Sec in CG : %g (%g) million\n", 1e-6 * gsize * its / rt_max, 1e-6 * gsize * its / rt_min)); 788 } 789 } 790 791 if (write_solution) { 792 PetscViewer vtk_viewer_soln; 793 794 PetscCall(PetscViewerCreate(comm, &vtk_viewer_soln)); 795 PetscCall(PetscViewerSetType(vtk_viewer_soln, PETSCVIEWERVTK)); 796 PetscCall(PetscViewerFileSetName(vtk_viewer_soln, "solution.vtu")); 797 PetscCall(VecView(X, vtk_viewer_soln)); 798 PetscCall(PetscViewerDestroy(&vtk_viewer_soln)); 799 } 800 801 PetscCall(VecDestroy(&rhs)); 802 PetscCall(VecDestroy(&rhs_loc)); 803 PetscCall(VecDestroy(&X)); 804 PetscCall(VecDestroy(&op_apply_ctx->X_loc)); 805 PetscCall(VecDestroy(&op_apply_ctx->Y_loc)); 806 PetscCall(VecScatterDestroy(&l_to_g)); 807 PetscCall(VecScatterDestroy(&l_to_g_0)); 808 PetscCall(VecScatterDestroy(&g_to_g_D)); 809 PetscCall(MatDestroy(&mat)); 810 PetscCall(KSPDestroy(&ksp)); 811 812 CeedVectorDestroy(&op_apply_ctx->x_ceed); 813 CeedVectorDestroy(&op_apply_ctx->y_ceed); 814 CeedVectorDestroy(&op_apply_ctx->q_data); 815 CeedVectorDestroy(&target); 816 CeedOperatorDestroy(&op_setup_geo); 817 CeedOperatorDestroy(&op_setup_rhs); 818 CeedOperatorDestroy(&op_apply); 819 CeedOperatorDestroy(&op_error); 820 CeedElemRestrictionDestroy(&elem_restr_u); 821 CeedElemRestrictionDestroy(&elem_restr_x); 822 CeedElemRestrictionDestroy(&elem_restr_u_i); 823 CeedElemRestrictionDestroy(&elem_restr_qd_i); 824 CeedQFunctionDestroy(&qf_setup_geo); 825 CeedQFunctionDestroy(&qf_setup_rhs); 826 CeedQFunctionDestroy(&qf_apply); 827 CeedQFunctionDestroy(&qf_error); 828 CeedBasisDestroy(&basis_u); 829 CeedBasisDestroy(&basis_x); 830 CeedDestroy(&ceed); 831 PetscCall(PetscFree(op_apply_ctx)); 832 return PetscFinalize(); 833 } 834