1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
278f7fce3SZach Atkins // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
378f7fce3SZach Atkins //
478f7fce3SZach Atkins // SPDX-License-Identifier: BSD-2-Clause
578f7fce3SZach Atkins //
678f7fce3SZach Atkins // This file is part of CEED: http://github.com/ceed
778f7fce3SZach Atkins
878f7fce3SZach Atkins // libCEED + PETSc Example: CEED BPs
978f7fce3SZach Atkins //
1078f7fce3SZach Atkins // This example demonstrates a simple usage of libCEED with PETSc to solve the CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps, on
1178f7fce3SZach Atkins // a particle swarm.
1278f7fce3SZach Atkins //
1378f7fce3SZach Atkins // The code uses higher level communication protocols in DMPlex and DMSwarm.
1478f7fce3SZach Atkins //
1578f7fce3SZach Atkins // Build with:
1678f7fce3SZach Atkins //
1778f7fce3SZach Atkins // make bpsswarm [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>]
1878f7fce3SZach Atkins //
1978f7fce3SZach Atkins // Sample runs:
2078f7fce3SZach Atkins //
2178f7fce3SZach Atkins // bpssphere -problem bp1 -degree 3
2278f7fce3SZach Atkins // bpssphere -problem bp2 -degree 3
2378f7fce3SZach Atkins // bpssphere -problem bp3 -degree 3
2478f7fce3SZach Atkins //
2507a9c11fSZach Atkins //TESTARGS(name="BP2") -ceed {ceed_resource} -test -problem bp2 -dm_plex_dim 3 -dm_plex_box_faces 3,3,3 -dm_plex_simplex 0 -swarm uniform -points_per_cell 64
2607a9c11fSZach Atkins //TESTARGS(name="BP3") -ceed {ceed_resource} -test -problem bp3 -dm_plex_dim 3 -dm_plex_box_faces 4,4,4 -dm_plex_simplex 0 -swarm uniform -points_per_cell 64 -tolerance 3e-2
2707a9c11fSZach Atkins //TESTARGS(name="BP5") -ceed {ceed_resource} -test -problem bp5 -dm_plex_dim 3 -dm_plex_box_faces 3,3,3 -dm_plex_simplex 0 -swarm uniform -points_per_cell 64
2878f7fce3SZach Atkins
2978f7fce3SZach Atkins /// @file
3078f7fce3SZach Atkins /// CEED BPs example using PETSc with DMPlex
3178f7fce3SZach Atkins /// See bpsraw.c for a "raw" implementation using a structured grid and bps.c for an implementation using an unstructured grid.
3278f7fce3SZach Atkins static const char help[] = "Solve CEED BPs on a particle swarm using DMPlex and DMSwarm in PETSc\n";
3378f7fce3SZach Atkins const char DMSwarmPICField_u[] = "u";
3478f7fce3SZach Atkins
3578f7fce3SZach Atkins #include "bps.h"
3678f7fce3SZach Atkins
3778f7fce3SZach Atkins #include <ceed.h>
3878f7fce3SZach Atkins #include <petscdmplex.h>
3978f7fce3SZach Atkins #include <petscksp.h>
4078f7fce3SZach Atkins #include <stdbool.h>
4178f7fce3SZach Atkins #include <string.h>
4278f7fce3SZach Atkins
4378f7fce3SZach Atkins #include "include/bpsproblemdata.h"
4478f7fce3SZach Atkins #include "include/libceedsetup.h"
4578f7fce3SZach Atkins #include "include/matops.h"
4678f7fce3SZach Atkins #include "include/petscutils.h"
4778f7fce3SZach Atkins #include "include/petscversion.h"
4878f7fce3SZach Atkins #include "include/swarmutils.h"
4978f7fce3SZach Atkins
main(int argc,char ** argv)5078f7fce3SZach Atkins int main(int argc, char **argv) {
5178f7fce3SZach Atkins MPI_Comm comm;
5278f7fce3SZach Atkins char ceed_resource[PETSC_MAX_PATH_LEN] = "/cpu/self", filename[PETSC_MAX_PATH_LEN];
5378f7fce3SZach Atkins double my_rt_start, my_rt, rt_min, rt_max;
5407a9c11fSZach Atkins PetscScalar tolerance;
554d00b080SJeremy L Thompson PetscMPIInt comm_size;
564d00b080SJeremy L Thompson PetscInt degree, q_extra, l_size, g_size, dim = 3, num_comp_u = 1, xl_size, num_points = 1728, num_points_per_cell = 64;
5778f7fce3SZach Atkins PetscBool test_mode, benchmark_mode, read_mesh, write_solution, write_true_solution_swarm;
5878f7fce3SZach Atkins PetscLogStage solve_stage;
5978f7fce3SZach Atkins Vec X, X_loc, rhs;
6078f7fce3SZach Atkins Mat mat_O;
6178f7fce3SZach Atkins KSP ksp;
6278f7fce3SZach Atkins DM dm_mesh, dm_swarm;
6378f7fce3SZach Atkins OperatorApplyContext op_apply_ctx, op_error_ctx;
6478f7fce3SZach Atkins Ceed ceed;
6578f7fce3SZach Atkins CeedData ceed_data;
6678f7fce3SZach Atkins CeedOperator op_error;
6778f7fce3SZach Atkins BPType bp_choice;
688c03e814SJeremy L Thompson VecType vec_type = VECSTANDARD;
6978f7fce3SZach Atkins PointSwarmType point_swarm_type = SWARM_GAUSS;
7078f7fce3SZach Atkins PetscMPIInt ranks_per_node;
7178f7fce3SZach Atkins char hostname[PETSC_MAX_PATH_LEN];
7278f7fce3SZach Atkins
7378f7fce3SZach Atkins PetscCall(PetscInitialize(&argc, &argv, NULL, help));
7478f7fce3SZach Atkins comm = PETSC_COMM_WORLD;
7578f7fce3SZach Atkins PetscCall(MPI_Comm_size(comm, &comm_size));
7678f7fce3SZach Atkins #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
7778f7fce3SZach Atkins {
7878f7fce3SZach Atkins MPI_Comm splitcomm;
7978f7fce3SZach Atkins PetscCall(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &splitcomm));
8078f7fce3SZach Atkins PetscCall(MPI_Comm_size(splitcomm, &ranks_per_node));
8178f7fce3SZach Atkins PetscCall(MPI_Comm_free(&splitcomm));
8278f7fce3SZach Atkins }
8378f7fce3SZach Atkins #else
8478f7fce3SZach Atkins ranks_per_node = -1; // Unknown
8578f7fce3SZach Atkins #endif
8678f7fce3SZach Atkins
8778f7fce3SZach Atkins // Read command line options
8878f7fce3SZach Atkins PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
8978f7fce3SZach Atkins bp_choice = CEED_BP1;
9078f7fce3SZach Atkins PetscCall(PetscOptionsEnum("-problem", "CEED benchmark problem to solve", NULL, bp_types, (PetscEnum)bp_choice, (PetscEnum *)&bp_choice, NULL));
9178f7fce3SZach Atkins num_comp_u = bp_options[bp_choice].num_comp_u;
9278f7fce3SZach Atkins test_mode = PETSC_FALSE;
9378f7fce3SZach Atkins PetscCall(PetscOptionsBool("-test", "Testing mode (do not print unless error is large)", NULL, test_mode, &test_mode, NULL));
9478f7fce3SZach Atkins benchmark_mode = PETSC_FALSE;
9578f7fce3SZach Atkins PetscCall(PetscOptionsBool("-benchmark", "Benchmarking mode (prints benchmark statistics)", NULL, benchmark_mode, &benchmark_mode, NULL));
9678f7fce3SZach Atkins write_solution = PETSC_FALSE;
9778f7fce3SZach Atkins PetscCall(PetscOptionsBool("-write_solution", "Write solution for visualization", NULL, write_solution, &write_solution, NULL));
9878f7fce3SZach Atkins write_true_solution_swarm = PETSC_FALSE;
9978f7fce3SZach Atkins PetscCall(PetscOptionsBool("-write_true_solution_swarm", "Write true solution at swarm points for visualization", NULL, write_true_solution_swarm,
10078f7fce3SZach Atkins &write_true_solution_swarm, NULL));
10107a9c11fSZach Atkins degree = 2;
10278f7fce3SZach Atkins PetscCall(PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", NULL, degree, °ree, NULL));
1033f49564bSZach Atkins q_extra = bp_options[bp_choice].q_extra;
10478f7fce3SZach Atkins PetscCall(PetscOptionsInt("-q_extra", "Number of extra quadrature points", NULL, q_extra, &q_extra, NULL));
10578f7fce3SZach Atkins PetscCall(PetscOptionsString("-ceed", "CEED resource specifier", NULL, ceed_resource, ceed_resource, sizeof(ceed_resource), NULL));
10678f7fce3SZach Atkins PetscCall(PetscGetHostName(hostname, sizeof hostname));
10778f7fce3SZach Atkins PetscCall(PetscOptionsString("-hostname", "Hostname for output", NULL, hostname, hostname, sizeof(hostname), NULL));
10878f7fce3SZach Atkins read_mesh = PETSC_FALSE;
10978f7fce3SZach Atkins PetscCall(PetscOptionsString("-mesh", "Read mesh from file", NULL, filename, filename, sizeof(filename), &read_mesh));
11007a9c11fSZach Atkins tolerance = 1e-2;
11107a9c11fSZach Atkins PetscCall(PetscOptionsScalar("-tolerance", "Tolerance for L2 error", NULL, tolerance, &tolerance, NULL));
11278f7fce3SZach Atkins PetscCall(PetscOptionsEnum("-swarm", "Swarm points distribution", NULL, point_swarm_types, (PetscEnum)point_swarm_type,
11378f7fce3SZach Atkins (PetscEnum *)&point_swarm_type, NULL));
11478f7fce3SZach Atkins {
11578f7fce3SZach Atkins PetscBool user_set_num_points_per_cell = PETSC_FALSE;
11678f7fce3SZach Atkins PetscInt num_cells_total = 1, tmp = dim;
11778f7fce3SZach Atkins PetscInt num_cells[] = {1, 1, 1};
11878f7fce3SZach Atkins
11978f7fce3SZach Atkins PetscCall(PetscOptionsInt("-points_per_cell", "Total number of swarm points in each cell", NULL, num_points_per_cell, &num_points_per_cell,
12078f7fce3SZach Atkins &user_set_num_points_per_cell));
12178f7fce3SZach Atkins PetscCall(PetscOptionsInt("-dm_plex_dim", "Background mesh dimension", NULL, dim, &dim, NULL));
12278f7fce3SZach Atkins PetscCall(PetscOptionsIntArray("-dm_plex_box_faces", "Number of cells", NULL, num_cells, &tmp, NULL));
12378f7fce3SZach Atkins
12478f7fce3SZach Atkins PetscCheck(tmp == dim, comm, PETSC_ERR_USER, "Number of values for -dm_plex_box_faces must match dimension");
12578f7fce3SZach Atkins
12678f7fce3SZach Atkins num_cells_total = num_cells[0] * num_cells[1] * num_cells[2];
12778f7fce3SZach Atkins PetscCheck(!user_set_num_points_per_cell || point_swarm_type != SWARM_SINUSOIDAL, comm, PETSC_ERR_USER,
12878f7fce3SZach Atkins "Cannot specify points per cell with sinusoidal points locations");
12978f7fce3SZach Atkins if (!user_set_num_points_per_cell) {
13078f7fce3SZach Atkins PetscCall(PetscOptionsInt("-points", "Total number of swarm points", NULL, num_points, &num_points, NULL));
13178f7fce3SZach Atkins num_points_per_cell = PetscCeilInt(num_points, num_cells_total);
13278f7fce3SZach Atkins }
13378f7fce3SZach Atkins if (point_swarm_type != SWARM_SINUSOIDAL) {
13478f7fce3SZach Atkins PetscInt num_points_per_cell_1d = round(cbrt(num_points_per_cell * 1.0));
13578f7fce3SZach Atkins
13678f7fce3SZach Atkins num_points_per_cell = 1;
13778f7fce3SZach Atkins for (PetscInt i = 0; i < dim; i++) num_points_per_cell *= num_points_per_cell_1d;
13878f7fce3SZach Atkins }
13978f7fce3SZach Atkins num_points = num_points_per_cell * num_cells_total;
14078f7fce3SZach Atkins }
14178f7fce3SZach Atkins {
14278f7fce3SZach Atkins PetscBool flg;
14378f7fce3SZach Atkins PetscInt p = ranks_per_node;
14478f7fce3SZach Atkins PetscCall(PetscOptionsInt("-p", "Number of MPI ranks per node", NULL, p, &p, &flg));
14578f7fce3SZach Atkins if (flg) ranks_per_node = p;
14678f7fce3SZach Atkins }
14778f7fce3SZach Atkins PetscOptionsEnd();
14878f7fce3SZach Atkins
1493719258aSZach Atkins // Set up libCEED
1503719258aSZach Atkins CeedInit(ceed_resource, &ceed);
1513719258aSZach Atkins CeedMemType mem_type_backend;
1523719258aSZach Atkins CeedGetPreferredMemType(ceed, &mem_type_backend);
1533719258aSZach Atkins
1543719258aSZach Atkins // Set background mesh vec_type
1553719258aSZach Atkins switch (mem_type_backend) {
1563719258aSZach Atkins case CEED_MEM_HOST:
1573719258aSZach Atkins vec_type = VECSTANDARD;
1583719258aSZach Atkins break;
1593719258aSZach Atkins case CEED_MEM_DEVICE: {
1603719258aSZach Atkins const char *resolved;
1613719258aSZach Atkins
1623719258aSZach Atkins CeedGetResource(ceed, &resolved);
1633719258aSZach Atkins if (strstr(resolved, "/gpu/cuda")) vec_type = VECCUDA;
1643719258aSZach Atkins else if (strstr(resolved, "/gpu/hip")) vec_type = VECHIP;
1653719258aSZach Atkins else vec_type = VECSTANDARD;
1663719258aSZach Atkins }
1673719258aSZach Atkins }
1683719258aSZach Atkins
16978f7fce3SZach Atkins // Setup DM
17078f7fce3SZach Atkins if (read_mesh) {
17178f7fce3SZach Atkins PetscCall(DMPlexCreateFromFile(comm, filename, NULL, PETSC_TRUE, &dm_mesh));
17278f7fce3SZach Atkins } else {
17378f7fce3SZach Atkins PetscCall(DMCreate(comm, &dm_mesh));
17478f7fce3SZach Atkins PetscCall(DMSetType(dm_mesh, DMPLEX));
17578f7fce3SZach Atkins PetscCall(DMSetFromOptions(dm_mesh));
17678f7fce3SZach Atkins
17778f7fce3SZach Atkins // -- Check for tensor product mesh
17878f7fce3SZach Atkins {
17978f7fce3SZach Atkins PetscBool is_simplex;
18078f7fce3SZach Atkins
18178f7fce3SZach Atkins PetscCall(DMPlexIsSimplex(dm_mesh, &is_simplex));
18278f7fce3SZach Atkins PetscCheck(!is_simplex, comm, PETSC_ERR_USER, "Only tensor-product background meshes supported");
18378f7fce3SZach Atkins }
18478f7fce3SZach Atkins }
1853719258aSZach Atkins PetscCall(DMSetVecType(dm_mesh, vec_type));
1863719258aSZach Atkins PetscCall(DMSetFromOptions(dm_mesh));
1873719258aSZach Atkins
18878f7fce3SZach Atkins PetscCall(DMGetDimension(dm_mesh, &dim));
18978f7fce3SZach Atkins PetscCall(SetupDMByDegree(dm_mesh, degree, q_extra, num_comp_u, dim, bp_options[bp_choice].enforce_bc));
19078f7fce3SZach Atkins
19178f7fce3SZach Atkins // View mesh
19278f7fce3SZach Atkins PetscCall(DMViewFromOptions(dm_mesh, NULL, "-dm_view"));
19378f7fce3SZach Atkins
19478f7fce3SZach Atkins // Create particle swarm
19578f7fce3SZach Atkins PetscCall(DMCreate(comm, &dm_swarm));
19678f7fce3SZach Atkins PetscCall(DMSetType(dm_swarm, DMSWARM));
19778f7fce3SZach Atkins PetscCall(DMSetDimension(dm_swarm, dim));
19878f7fce3SZach Atkins PetscCall(DMSwarmSetType(dm_swarm, DMSWARM_PIC));
19978f7fce3SZach Atkins PetscCall(DMSwarmSetCellDM(dm_swarm, dm_mesh));
20078f7fce3SZach Atkins
20178f7fce3SZach Atkins // -- Swarm field
20278f7fce3SZach Atkins PetscCall(DMSwarmRegisterPetscDatatypeField(dm_swarm, DMSwarmPICField_u, num_comp_u, PETSC_SCALAR));
20378f7fce3SZach Atkins PetscCall(DMSwarmFinalizeFieldRegister(dm_swarm));
20407a9c11fSZach Atkins {
20507a9c11fSZach Atkins PetscInt c_start, c_end, num_cells_local;
20607a9c11fSZach Atkins PetscCall(DMPlexGetHeightStratum(dm_mesh, 0, &c_start, &c_end));
20707a9c11fSZach Atkins num_cells_local = c_end - c_start;
20807a9c11fSZach Atkins PetscCall(DMSwarmSetLocalSizes(dm_swarm, num_cells_local * num_points_per_cell, 0));
20907a9c11fSZach Atkins }
21078f7fce3SZach Atkins PetscCall(DMSetFromOptions(dm_swarm));
21178f7fce3SZach Atkins
21278f7fce3SZach Atkins // -- Set swarm point locations
21378f7fce3SZach Atkins PetscCall(DMSwarmInitalizePointLocations(dm_swarm, point_swarm_type, num_points, num_points_per_cell));
21478f7fce3SZach Atkins PetscCall(DMSwarmVectorDefineField(dm_swarm, DMSwarmPICField_u));
21578f7fce3SZach Atkins
21678f7fce3SZach Atkins // -- Final particle swarm
21778f7fce3SZach Atkins PetscCall(PetscObjectSetName((PetscObject)dm_swarm, "Particle Swarm"));
21878f7fce3SZach Atkins PetscCall(DMViewFromOptions(dm_swarm, NULL, "-dm_swarm_view"));
21978f7fce3SZach Atkins
22078f7fce3SZach Atkins // Create vectors
22178f7fce3SZach Atkins PetscCall(DMCreateGlobalVector(dm_mesh, &X));
22278f7fce3SZach Atkins PetscCall(VecGetLocalSize(X, &l_size));
22378f7fce3SZach Atkins PetscCall(VecGetSize(X, &g_size));
22478f7fce3SZach Atkins PetscCall(DMCreateLocalVector(dm_mesh, &X_loc));
22578f7fce3SZach Atkins PetscCall(VecGetSize(X_loc, &xl_size));
22678f7fce3SZach Atkins PetscCall(VecDuplicate(X, &rhs));
22778f7fce3SZach Atkins
22878f7fce3SZach Atkins // Operator
22978f7fce3SZach Atkins PetscCall(PetscMalloc1(1, &op_apply_ctx));
23078f7fce3SZach Atkins PetscCall(PetscMalloc1(1, &op_error_ctx));
23178f7fce3SZach Atkins PetscCall(MatCreateShell(comm, l_size, l_size, g_size, g_size, op_apply_ctx, &mat_O));
23278f7fce3SZach Atkins PetscCall(MatSetDM(mat_O, dm_mesh));
23378f7fce3SZach Atkins PetscCall(MatShellSetOperation(mat_O, MATOP_MULT, (void (*)(void))MatMult_Ceed));
2349c674643SJeremy L Thompson PetscCall(MatShellSetOperation(mat_O, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiag));
23578f7fce3SZach Atkins
23678f7fce3SZach Atkins // Print summary
23778f7fce3SZach Atkins if (!test_mode) {
23878f7fce3SZach Atkins PetscInt P = degree + 1, Q = P + q_extra;
23978f7fce3SZach Atkins
24078f7fce3SZach Atkins const char *used_resource;
24178f7fce3SZach Atkins CeedGetResource(ceed, &used_resource);
24278f7fce3SZach Atkins
24378f7fce3SZach Atkins VecType vec_type;
24478f7fce3SZach Atkins PetscCall(VecGetType(X, &vec_type));
24578f7fce3SZach Atkins
24678f7fce3SZach Atkins PetscInt c_start, c_end, num_cells_local;
24778f7fce3SZach Atkins PetscCall(DMPlexGetHeightStratum(dm_mesh, 0, &c_start, &c_end));
24878f7fce3SZach Atkins num_cells_local = c_end - c_start;
24978f7fce3SZach Atkins DMPolytopeType cell_type;
25078f7fce3SZach Atkins PetscCall(DMPlexGetCellType(dm_mesh, c_start, &cell_type));
25178f7fce3SZach Atkins PetscMPIInt comm_size;
25278f7fce3SZach Atkins PetscCall(MPI_Comm_size(comm, &comm_size));
25378f7fce3SZach Atkins
25407a9c11fSZach Atkins PetscInt num_points_local, num_points_global;
25578f7fce3SZach Atkins PetscCall(DMSwarmGetLocalSize(dm_swarm, &num_points_local));
25607a9c11fSZach Atkins PetscCall(DMSwarmGetSize(dm_swarm, &num_points_global));
25778f7fce3SZach Atkins
25878f7fce3SZach Atkins PetscCall(PetscPrintf(comm,
25978f7fce3SZach Atkins "\n-- CEED Benchmark Problem %" CeedInt_FMT " -- libCEED + PETSc --\n"
26078f7fce3SZach Atkins " MPI:\n"
26178f7fce3SZach Atkins " Hostname : %s\n"
26278f7fce3SZach Atkins " Total ranks : %d\n"
26378f7fce3SZach Atkins " Ranks per compute node : %d\n"
26478f7fce3SZach Atkins " PETSc:\n"
26578f7fce3SZach Atkins " PETSc Vec Type : %s\n"
26678f7fce3SZach Atkins " libCEED:\n"
26778f7fce3SZach Atkins " libCEED Backend : %s\n"
26878f7fce3SZach Atkins " libCEED Backend MemType : %s\n"
26978f7fce3SZach Atkins " Mesh:\n"
2704d00b080SJeremy L Thompson " Solution Order (P) : %" PetscInt_FMT "\n"
2714d00b080SJeremy L Thompson " Quadrature Order (Q) : %" PetscInt_FMT "\n"
2724d00b080SJeremy L Thompson " Additional quadrature points (q_extra) : %" PetscInt_FMT "\n"
27378f7fce3SZach Atkins " Global nodes : %" PetscInt_FMT "\n"
27478f7fce3SZach Atkins " Local Elements : %" PetscInt_FMT "\n"
27578f7fce3SZach Atkins " Owned nodes : %" PetscInt_FMT "\n"
27678f7fce3SZach Atkins " DoF per node : %" PetscInt_FMT "\n"
27778f7fce3SZach Atkins " Swarm:\n"
27878f7fce3SZach Atkins " Global points : %" PetscInt_FMT "\n"
27978f7fce3SZach Atkins " Local points : %" PetscInt_FMT "\n"
28078f7fce3SZach Atkins " Avg points per cell : %" PetscInt_FMT "\n"
28178f7fce3SZach Atkins " Point distribution : %s\n",
28278f7fce3SZach Atkins bp_choice + 1, hostname, comm_size, ranks_per_node, vec_type, used_resource, CeedMemTypes[mem_type_backend], P, Q, q_extra,
28307a9c11fSZach Atkins g_size / num_comp_u, num_cells_local, l_size / num_comp_u, num_comp_u, num_points_global, num_points_local,
28478f7fce3SZach Atkins num_cells_local > 0 ? num_points_local / num_cells_local : 0, point_swarm_types[point_swarm_type]));
28578f7fce3SZach Atkins }
28678f7fce3SZach Atkins
28778f7fce3SZach Atkins // Setup libCEED's objects
28878f7fce3SZach Atkins Vec target;
28978f7fce3SZach Atkins
29078f7fce3SZach Atkins PetscCall(DMCreateLocalVector(dm_swarm, &target));
29178f7fce3SZach Atkins PetscCall(PetscMalloc1(1, &ceed_data));
29278f7fce3SZach Atkins PetscCall(SetupProblemSwarm(dm_swarm, ceed, bp_options[bp_choice], ceed_data, true, rhs, target));
29378f7fce3SZach Atkins PetscCall(SetupErrorOperator(dm_mesh, ceed, bp_options[bp_choice], dim, dim, num_comp_u, &op_error));
29478f7fce3SZach Atkins
29578f7fce3SZach Atkins // Set up apply operator context
29678f7fce3SZach Atkins PetscCall(SetupApplyOperatorCtx(comm, dm_mesh, ceed, ceed_data, X_loc, op_apply_ctx));
29778f7fce3SZach Atkins
29878f7fce3SZach Atkins // Setup solver
29978f7fce3SZach Atkins PetscCall(KSPCreate(comm, &ksp));
30078f7fce3SZach Atkins {
30178f7fce3SZach Atkins PC pc;
30278f7fce3SZach Atkins PetscCall(KSPGetPC(ksp, &pc));
30378f7fce3SZach Atkins if (bp_choice == CEED_BP1 || bp_choice == CEED_BP2) {
30478f7fce3SZach Atkins PetscCall(PCSetType(pc, PCJACOBI));
3059c674643SJeremy L Thompson PetscCall(PCJacobiSetType(pc, PC_JACOBI_DIAGONAL));
30678f7fce3SZach Atkins } else {
30778f7fce3SZach Atkins PetscCall(PCSetType(pc, PCNONE));
30878f7fce3SZach Atkins }
30978f7fce3SZach Atkins PetscCall(KSPSetType(ksp, KSPCG));
31078f7fce3SZach Atkins PetscCall(KSPSetNormType(ksp, KSP_NORM_NATURAL));
31178f7fce3SZach Atkins PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
31278f7fce3SZach Atkins }
31378f7fce3SZach Atkins PetscCall(KSPSetFromOptions(ksp));
31478f7fce3SZach Atkins PetscCall(KSPSetOperators(ksp, mat_O, mat_O));
31578f7fce3SZach Atkins
31678f7fce3SZach Atkins // First run, if benchmarking
31778f7fce3SZach Atkins if (benchmark_mode) {
31878f7fce3SZach Atkins PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1));
31978f7fce3SZach Atkins my_rt_start = MPI_Wtime();
32078f7fce3SZach Atkins PetscCall(KSPSolve(ksp, rhs, X));
32178f7fce3SZach Atkins my_rt = MPI_Wtime() - my_rt_start;
32278f7fce3SZach Atkins PetscCall(MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm));
32378f7fce3SZach Atkins // Set maxits based on first iteration timing
32478f7fce3SZach Atkins if (my_rt > 0.02) {
32578f7fce3SZach Atkins PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5));
32678f7fce3SZach Atkins } else {
32778f7fce3SZach Atkins PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20));
32878f7fce3SZach Atkins }
32978f7fce3SZach Atkins }
33078f7fce3SZach Atkins
33178f7fce3SZach Atkins // Timed solve
33278f7fce3SZach Atkins PetscCall(VecZeroEntries(X));
33378f7fce3SZach Atkins PetscCall(PetscBarrier((PetscObject)ksp));
33478f7fce3SZach Atkins
33578f7fce3SZach Atkins // -- Performance logging
33678f7fce3SZach Atkins PetscCall(PetscLogStageRegister("Solve Stage", &solve_stage));
33778f7fce3SZach Atkins PetscCall(PetscLogStagePush(solve_stage));
33878f7fce3SZach Atkins
33978f7fce3SZach Atkins // -- Solve
34078f7fce3SZach Atkins my_rt_start = MPI_Wtime();
34178f7fce3SZach Atkins PetscCall(KSPSolve(ksp, rhs, X));
34278f7fce3SZach Atkins my_rt = MPI_Wtime() - my_rt_start;
34378f7fce3SZach Atkins
34478f7fce3SZach Atkins // -- Performance logging
34578f7fce3SZach Atkins PetscCall(PetscLogStagePop());
34678f7fce3SZach Atkins
34778f7fce3SZach Atkins // Output results
34878f7fce3SZach Atkins {
34978f7fce3SZach Atkins KSPType ksp_type;
35078f7fce3SZach Atkins KSPConvergedReason reason;
35178f7fce3SZach Atkins PetscReal rnorm;
35278f7fce3SZach Atkins PetscInt its;
35378f7fce3SZach Atkins PetscCall(KSPGetType(ksp, &ksp_type));
35478f7fce3SZach Atkins PetscCall(KSPGetConvergedReason(ksp, &reason));
35578f7fce3SZach Atkins PetscCall(KSPGetIterationNumber(ksp, &its));
35678f7fce3SZach Atkins PetscCall(KSPGetResidualNorm(ksp, &rnorm));
35778f7fce3SZach Atkins if (!test_mode || reason < 0 || rnorm > 1e-8) {
35878f7fce3SZach Atkins PetscCall(PetscPrintf(comm,
35978f7fce3SZach Atkins " KSP:\n"
36078f7fce3SZach Atkins " KSP Type : %s\n"
36178f7fce3SZach Atkins " KSP Convergence : %s\n"
36278f7fce3SZach Atkins " Total KSP Iterations : %" PetscInt_FMT "\n"
36378f7fce3SZach Atkins " Final rnorm : %e\n",
36478f7fce3SZach Atkins ksp_type, KSPConvergedReasons[reason], its, (double)rnorm));
36578f7fce3SZach Atkins }
36678f7fce3SZach Atkins if (!test_mode) {
36778f7fce3SZach Atkins PetscCall(PetscPrintf(comm, " Performance:\n"));
36878f7fce3SZach Atkins }
36978f7fce3SZach Atkins
37078f7fce3SZach Atkins // View true solution at particles
37178f7fce3SZach Atkins if (write_true_solution_swarm) {
37278f7fce3SZach Atkins Vec u_swarm, u_swarm_old;
37378f7fce3SZach Atkins PetscCall(DMSwarmSortGetAccess(dm_swarm));
37478f7fce3SZach Atkins PetscCall(DMSwarmCreateLocalVectorFromField(dm_swarm, DMSwarmPICField_u, &u_swarm));
37578f7fce3SZach Atkins PetscCall(VecDuplicate(u_swarm, &u_swarm_old));
37678f7fce3SZach Atkins PetscCall(VecCopy(u_swarm, u_swarm_old));
37778f7fce3SZach Atkins PetscCall(VecCopy(target, u_swarm));
37878f7fce3SZach Atkins PetscCall(DMSwarmDestroyLocalVectorFromField(dm_swarm, DMSwarmPICField_u, &u_swarm));
37978f7fce3SZach Atkins PetscCall(DMSwarmSortRestoreAccess(dm_swarm));
38078f7fce3SZach Atkins
38178f7fce3SZach Atkins PetscCall(DMSwarmViewXDMF(dm_swarm, "swarm.xmf"));
38278f7fce3SZach Atkins
38378f7fce3SZach Atkins PetscCall(DMSwarmSortGetAccess(dm_swarm));
38478f7fce3SZach Atkins PetscCall(DMSwarmCreateLocalVectorFromField(dm_swarm, DMSwarmPICField_u, &u_swarm));
38578f7fce3SZach Atkins PetscCall(VecCopy(u_swarm_old, u_swarm));
38678f7fce3SZach Atkins PetscCall(DMSwarmDestroyLocalVectorFromField(dm_swarm, DMSwarmPICField_u, &u_swarm));
38778f7fce3SZach Atkins PetscCall(DMSwarmSortRestoreAccess(dm_swarm));
38878f7fce3SZach Atkins PetscCall(VecDestroy(&u_swarm_old));
38978f7fce3SZach Atkins }
39078f7fce3SZach Atkins
39178f7fce3SZach Atkins // View solution at mesh points
39278f7fce3SZach Atkins PetscCall(VecViewFromOptions(X, NULL, "-solution_view"));
39378f7fce3SZach Atkins
39478f7fce3SZach Atkins // Compute L2 Error
39578f7fce3SZach Atkins {
39678f7fce3SZach Atkins // Set up error operator context
39778f7fce3SZach Atkins PetscCall(SetupErrorOperatorCtx(comm, dm_mesh, ceed, ceed_data, X_loc, op_error, op_error_ctx));
39878f7fce3SZach Atkins PetscScalar l2_error;
39978f7fce3SZach Atkins PetscCall(ComputeL2Error(X, &l2_error, op_error_ctx));
40078f7fce3SZach Atkins
40107a9c11fSZach Atkins if (!test_mode || l2_error > tolerance) {
40278f7fce3SZach Atkins PetscCall(MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm));
40378f7fce3SZach Atkins PetscCall(MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm));
40478f7fce3SZach Atkins PetscCall(PetscPrintf(comm,
40578f7fce3SZach Atkins " L2 Error : %e\n"
40678f7fce3SZach Atkins " CG Solve Time : %g (%g) sec\n",
40778f7fce3SZach Atkins (double)l2_error, rt_max, rt_min));
40878f7fce3SZach Atkins }
40978f7fce3SZach Atkins }
41078f7fce3SZach Atkins if (benchmark_mode && (!test_mode)) {
41178f7fce3SZach Atkins PetscCall(PetscPrintf(comm, " DoFs/Sec in CG : %g (%g) million\n", 1e-6 * g_size * its / rt_max,
41278f7fce3SZach Atkins 1e-6 * g_size * its / rt_min));
41378f7fce3SZach Atkins }
41478f7fce3SZach Atkins }
41578f7fce3SZach Atkins
41678f7fce3SZach Atkins // Output solution
41778f7fce3SZach Atkins if (write_solution) {
41878f7fce3SZach Atkins PetscViewer vtk_viewer_soln;
41978f7fce3SZach Atkins
42078f7fce3SZach Atkins PetscCall(PetscViewerCreate(comm, &vtk_viewer_soln));
42178f7fce3SZach Atkins PetscCall(PetscViewerSetType(vtk_viewer_soln, PETSCVIEWERVTK));
42278f7fce3SZach Atkins PetscCall(PetscViewerFileSetName(vtk_viewer_soln, "solution.vtu"));
42378f7fce3SZach Atkins PetscCall(VecView(X, vtk_viewer_soln));
42478f7fce3SZach Atkins PetscCall(PetscViewerDestroy(&vtk_viewer_soln));
42578f7fce3SZach Atkins }
42678f7fce3SZach Atkins
42778f7fce3SZach Atkins // Cleanup
42878f7fce3SZach Atkins PetscCall(VecDestroy(&X));
42978f7fce3SZach Atkins PetscCall(VecDestroy(&X_loc));
43078f7fce3SZach Atkins PetscCall(VecDestroy(&op_apply_ctx->Y_loc));
43178f7fce3SZach Atkins PetscCall(VecDestroy(&op_error_ctx->Y_loc));
43278f7fce3SZach Atkins PetscCall(MatDestroy(&mat_O));
43378f7fce3SZach Atkins PetscCall(PetscFree(op_apply_ctx));
43478f7fce3SZach Atkins PetscCall(PetscFree(op_error_ctx));
43578f7fce3SZach Atkins PetscCall(CeedDataDestroy(0, ceed_data));
43678f7fce3SZach Atkins PetscCall(DMDestroy(&dm_mesh));
43778f7fce3SZach Atkins PetscCall(DMDestroy(&dm_swarm));
43878f7fce3SZach Atkins
43978f7fce3SZach Atkins PetscCall(VecDestroy(&rhs));
44078f7fce3SZach Atkins PetscCall(VecDestroy(&target));
44178f7fce3SZach Atkins PetscCall(KSPDestroy(&ksp));
44278f7fce3SZach Atkins CeedOperatorDestroy(&op_error);
44378f7fce3SZach Atkins CeedDestroy(&ceed);
44478f7fce3SZach Atkins return PetscFinalize();
44578f7fce3SZach Atkins }
446