xref: /libCEED/examples/petsc/bpsswarm.c (revision 8c03e814a8aedd48736bf8454f3df41e37fe2fcc)
15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
278f7fce3SZach Atkins // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
378f7fce3SZach Atkins //
478f7fce3SZach Atkins // SPDX-License-Identifier: BSD-2-Clause
578f7fce3SZach Atkins //
678f7fce3SZach Atkins // This file is part of CEED:  http://github.com/ceed
778f7fce3SZach Atkins 
878f7fce3SZach Atkins //                        libCEED + PETSc Example: CEED BPs
978f7fce3SZach Atkins //
1078f7fce3SZach Atkins // This example demonstrates a simple usage of libCEED with PETSc to solve the CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps, on
1178f7fce3SZach Atkins // a particle swarm.
1278f7fce3SZach Atkins //
1378f7fce3SZach Atkins // The code uses higher level communication protocols in DMPlex and DMSwarm.
1478f7fce3SZach Atkins //
1578f7fce3SZach Atkins // Build with:
1678f7fce3SZach Atkins //
1778f7fce3SZach Atkins //     make bpsswarm [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>]
1878f7fce3SZach Atkins //
1978f7fce3SZach Atkins // Sample runs:
2078f7fce3SZach Atkins //
2178f7fce3SZach Atkins //     bpssphere -problem bp1 -degree 3
2278f7fce3SZach Atkins //     bpssphere -problem bp2 -degree 3
2378f7fce3SZach Atkins //     bpssphere -problem bp3 -degree 3
2478f7fce3SZach Atkins //
2507a9c11fSZach Atkins //TESTARGS(name="BP2") -ceed {ceed_resource} -test -problem bp2 -dm_plex_dim 3 -dm_plex_box_faces 3,3,3 -dm_plex_simplex 0 -swarm uniform -points_per_cell 64
2607a9c11fSZach Atkins //TESTARGS(name="BP3") -ceed {ceed_resource} -test -problem bp3 -dm_plex_dim 3 -dm_plex_box_faces 4,4,4 -dm_plex_simplex 0 -swarm uniform -points_per_cell 64 -tolerance 3e-2
2707a9c11fSZach Atkins //TESTARGS(name="BP5") -ceed {ceed_resource} -test -problem bp5 -dm_plex_dim 3 -dm_plex_box_faces 3,3,3 -dm_plex_simplex 0 -swarm uniform -points_per_cell 64
2878f7fce3SZach Atkins 
2978f7fce3SZach Atkins /// @file
3078f7fce3SZach Atkins /// CEED BPs example using PETSc with DMPlex
3178f7fce3SZach Atkins /// See bpsraw.c for a "raw" implementation using a structured grid and bps.c for an implementation using an unstructured grid.
3278f7fce3SZach Atkins static const char help[]              = "Solve CEED BPs on a particle swarm using DMPlex and DMSwarm in PETSc\n";
3378f7fce3SZach Atkins const char        DMSwarmPICField_u[] = "u";
3478f7fce3SZach Atkins 
3578f7fce3SZach Atkins #include "bps.h"
3678f7fce3SZach Atkins 
3778f7fce3SZach Atkins #include <ceed.h>
3878f7fce3SZach Atkins #include <petscdmplex.h>
3978f7fce3SZach Atkins #include <petscksp.h>
4078f7fce3SZach Atkins #include <stdbool.h>
4178f7fce3SZach Atkins #include <string.h>
4278f7fce3SZach Atkins 
4378f7fce3SZach Atkins #include "include/bpsproblemdata.h"
4478f7fce3SZach Atkins #include "include/libceedsetup.h"
4578f7fce3SZach Atkins #include "include/matops.h"
4678f7fce3SZach Atkins #include "include/petscutils.h"
4778f7fce3SZach Atkins #include "include/petscversion.h"
4878f7fce3SZach Atkins #include "include/swarmutils.h"
4978f7fce3SZach Atkins 
5078f7fce3SZach Atkins int main(int argc, char **argv) {
5178f7fce3SZach Atkins   MPI_Comm             comm;
5278f7fce3SZach Atkins   char                 ceed_resource[PETSC_MAX_PATH_LEN] = "/cpu/self", filename[PETSC_MAX_PATH_LEN];
5378f7fce3SZach Atkins   double               my_rt_start, my_rt, rt_min, rt_max;
5407a9c11fSZach Atkins   PetscScalar          tolerance;
554d00b080SJeremy L Thompson   PetscMPIInt          comm_size;
564d00b080SJeremy L Thompson   PetscInt             degree, q_extra, l_size, g_size, dim = 3, num_comp_u = 1, xl_size, num_points = 1728, num_points_per_cell = 64;
5778f7fce3SZach Atkins   PetscBool            test_mode, benchmark_mode, read_mesh, write_solution, write_true_solution_swarm;
5878f7fce3SZach Atkins   PetscLogStage        solve_stage;
5978f7fce3SZach Atkins   Vec                  X, X_loc, rhs;
6078f7fce3SZach Atkins   Mat                  mat_O;
6178f7fce3SZach Atkins   KSP                  ksp;
6278f7fce3SZach Atkins   DM                   dm_mesh, dm_swarm;
6378f7fce3SZach Atkins   OperatorApplyContext op_apply_ctx, op_error_ctx;
6478f7fce3SZach Atkins   Ceed                 ceed;
6578f7fce3SZach Atkins   CeedData             ceed_data;
6678f7fce3SZach Atkins   CeedOperator         op_error;
6778f7fce3SZach Atkins   BPType               bp_choice;
68*8c03e814SJeremy L Thompson   VecType              vec_type         = VECSTANDARD;
6978f7fce3SZach Atkins   PointSwarmType       point_swarm_type = SWARM_GAUSS;
7078f7fce3SZach Atkins   PetscMPIInt          ranks_per_node;
7178f7fce3SZach Atkins   char                 hostname[PETSC_MAX_PATH_LEN];
7278f7fce3SZach Atkins 
7378f7fce3SZach Atkins   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
7478f7fce3SZach Atkins   comm = PETSC_COMM_WORLD;
7578f7fce3SZach Atkins   PetscCall(MPI_Comm_size(comm, &comm_size));
7678f7fce3SZach Atkins #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
7778f7fce3SZach Atkins   {
7878f7fce3SZach Atkins     MPI_Comm splitcomm;
7978f7fce3SZach Atkins     PetscCall(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &splitcomm));
8078f7fce3SZach Atkins     PetscCall(MPI_Comm_size(splitcomm, &ranks_per_node));
8178f7fce3SZach Atkins     PetscCall(MPI_Comm_free(&splitcomm));
8278f7fce3SZach Atkins   }
8378f7fce3SZach Atkins #else
8478f7fce3SZach Atkins   ranks_per_node = -1;  // Unknown
8578f7fce3SZach Atkins #endif
8678f7fce3SZach Atkins 
8778f7fce3SZach Atkins   // Read command line options
8878f7fce3SZach Atkins   PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
8978f7fce3SZach Atkins   bp_choice = CEED_BP1;
9078f7fce3SZach Atkins   PetscCall(PetscOptionsEnum("-problem", "CEED benchmark problem to solve", NULL, bp_types, (PetscEnum)bp_choice, (PetscEnum *)&bp_choice, NULL));
9178f7fce3SZach Atkins   num_comp_u = bp_options[bp_choice].num_comp_u;
9278f7fce3SZach Atkins   test_mode  = PETSC_FALSE;
9378f7fce3SZach Atkins   PetscCall(PetscOptionsBool("-test", "Testing mode (do not print unless error is large)", NULL, test_mode, &test_mode, NULL));
9478f7fce3SZach Atkins   benchmark_mode = PETSC_FALSE;
9578f7fce3SZach Atkins   PetscCall(PetscOptionsBool("-benchmark", "Benchmarking mode (prints benchmark statistics)", NULL, benchmark_mode, &benchmark_mode, NULL));
9678f7fce3SZach Atkins   write_solution = PETSC_FALSE;
9778f7fce3SZach Atkins   PetscCall(PetscOptionsBool("-write_solution", "Write solution for visualization", NULL, write_solution, &write_solution, NULL));
9878f7fce3SZach Atkins   write_true_solution_swarm = PETSC_FALSE;
9978f7fce3SZach Atkins   PetscCall(PetscOptionsBool("-write_true_solution_swarm", "Write true solution at swarm points for visualization", NULL, write_true_solution_swarm,
10078f7fce3SZach Atkins                              &write_true_solution_swarm, NULL));
10107a9c11fSZach Atkins   degree = 2;
10278f7fce3SZach Atkins   PetscCall(PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", NULL, degree, &degree, NULL));
10378f7fce3SZach Atkins   q_extra = bp_options[bp_choice].q_extra;
10478f7fce3SZach Atkins   PetscCall(PetscOptionsInt("-q_extra", "Number of extra quadrature points", NULL, q_extra, &q_extra, NULL));
10578f7fce3SZach Atkins   PetscCall(PetscOptionsString("-ceed", "CEED resource specifier", NULL, ceed_resource, ceed_resource, sizeof(ceed_resource), NULL));
10678f7fce3SZach Atkins   PetscCall(PetscGetHostName(hostname, sizeof hostname));
10778f7fce3SZach Atkins   PetscCall(PetscOptionsString("-hostname", "Hostname for output", NULL, hostname, hostname, sizeof(hostname), NULL));
10878f7fce3SZach Atkins   read_mesh = PETSC_FALSE;
10978f7fce3SZach Atkins   PetscCall(PetscOptionsString("-mesh", "Read mesh from file", NULL, filename, filename, sizeof(filename), &read_mesh));
11007a9c11fSZach Atkins   tolerance = 1e-2;
11107a9c11fSZach Atkins   PetscCall(PetscOptionsScalar("-tolerance", "Tolerance for L2 error", NULL, tolerance, &tolerance, NULL));
11278f7fce3SZach Atkins   PetscCall(PetscOptionsEnum("-swarm", "Swarm points distribution", NULL, point_swarm_types, (PetscEnum)point_swarm_type,
11378f7fce3SZach Atkins                              (PetscEnum *)&point_swarm_type, NULL));
11478f7fce3SZach Atkins   {
11578f7fce3SZach Atkins     PetscBool user_set_num_points_per_cell = PETSC_FALSE;
11678f7fce3SZach Atkins     PetscInt  num_cells_total = 1, tmp = dim;
11778f7fce3SZach Atkins     PetscInt  num_cells[] = {1, 1, 1};
11878f7fce3SZach Atkins 
11978f7fce3SZach Atkins     PetscCall(PetscOptionsInt("-points_per_cell", "Total number of swarm points in each cell", NULL, num_points_per_cell, &num_points_per_cell,
12078f7fce3SZach Atkins                               &user_set_num_points_per_cell));
12178f7fce3SZach Atkins     PetscCall(PetscOptionsInt("-dm_plex_dim", "Background mesh dimension", NULL, dim, &dim, NULL));
12278f7fce3SZach Atkins     PetscCall(PetscOptionsIntArray("-dm_plex_box_faces", "Number of cells", NULL, num_cells, &tmp, NULL));
12378f7fce3SZach Atkins 
12478f7fce3SZach Atkins     PetscCheck(tmp == dim, comm, PETSC_ERR_USER, "Number of values for -dm_plex_box_faces must match dimension");
12578f7fce3SZach Atkins 
12678f7fce3SZach Atkins     num_cells_total = num_cells[0] * num_cells[1] * num_cells[2];
12778f7fce3SZach Atkins     PetscCheck(!user_set_num_points_per_cell || point_swarm_type != SWARM_SINUSOIDAL, comm, PETSC_ERR_USER,
12878f7fce3SZach Atkins                "Cannot specify points per cell with sinusoidal points locations");
12978f7fce3SZach Atkins     if (!user_set_num_points_per_cell) {
13078f7fce3SZach Atkins       PetscCall(PetscOptionsInt("-points", "Total number of swarm points", NULL, num_points, &num_points, NULL));
13178f7fce3SZach Atkins       num_points_per_cell = PetscCeilInt(num_points, num_cells_total);
13278f7fce3SZach Atkins     }
13378f7fce3SZach Atkins     if (point_swarm_type != SWARM_SINUSOIDAL) {
13478f7fce3SZach Atkins       PetscInt num_points_per_cell_1d = round(cbrt(num_points_per_cell * 1.0));
13578f7fce3SZach Atkins 
13678f7fce3SZach Atkins       num_points_per_cell = 1;
13778f7fce3SZach Atkins       for (PetscInt i = 0; i < dim; i++) num_points_per_cell *= num_points_per_cell_1d;
13878f7fce3SZach Atkins     }
13978f7fce3SZach Atkins     num_points = num_points_per_cell * num_cells_total;
14078f7fce3SZach Atkins   }
14178f7fce3SZach Atkins   {
14278f7fce3SZach Atkins     PetscBool flg;
14378f7fce3SZach Atkins     PetscInt  p = ranks_per_node;
14478f7fce3SZach Atkins     PetscCall(PetscOptionsInt("-p", "Number of MPI ranks per node", NULL, p, &p, &flg));
14578f7fce3SZach Atkins     if (flg) ranks_per_node = p;
14678f7fce3SZach Atkins   }
14778f7fce3SZach Atkins   PetscOptionsEnd();
14878f7fce3SZach Atkins 
14978f7fce3SZach Atkins   // Setup DM
15078f7fce3SZach Atkins   if (read_mesh) {
15178f7fce3SZach Atkins     PetscCall(DMPlexCreateFromFile(comm, filename, NULL, PETSC_TRUE, &dm_mesh));
15278f7fce3SZach Atkins   } else {
15378f7fce3SZach Atkins     PetscCall(DMCreate(comm, &dm_mesh));
15478f7fce3SZach Atkins     PetscCall(DMSetType(dm_mesh, DMPLEX));
15578f7fce3SZach Atkins     PetscCall(DMSetFromOptions(dm_mesh));
15678f7fce3SZach Atkins 
15778f7fce3SZach Atkins     // -- Check for tensor product mesh
15878f7fce3SZach Atkins     {
15978f7fce3SZach Atkins       PetscBool is_simplex;
16078f7fce3SZach Atkins 
16178f7fce3SZach Atkins       PetscCall(DMPlexIsSimplex(dm_mesh, &is_simplex));
16278f7fce3SZach Atkins       PetscCheck(!is_simplex, comm, PETSC_ERR_USER, "Only tensor-product background meshes supported");
16378f7fce3SZach Atkins     }
16478f7fce3SZach Atkins   }
16578f7fce3SZach Atkins   PetscCall(DMGetDimension(dm_mesh, &dim));
16678f7fce3SZach Atkins   PetscCall(SetupDMByDegree(dm_mesh, degree, q_extra, num_comp_u, dim, bp_options[bp_choice].enforce_bc));
16778f7fce3SZach Atkins 
16878f7fce3SZach Atkins   // View mesh
16978f7fce3SZach Atkins   PetscCall(DMSetOptionsPrefix(dm_mesh, "final_"));
17078f7fce3SZach Atkins   PetscCall(DMViewFromOptions(dm_mesh, NULL, "-dm_view"));
17178f7fce3SZach Atkins 
17278f7fce3SZach Atkins   // Create particle swarm
17378f7fce3SZach Atkins   PetscCall(DMCreate(comm, &dm_swarm));
17478f7fce3SZach Atkins   PetscCall(DMSetType(dm_swarm, DMSWARM));
17578f7fce3SZach Atkins   PetscCall(DMSetDimension(dm_swarm, dim));
17678f7fce3SZach Atkins   PetscCall(DMSwarmSetType(dm_swarm, DMSWARM_PIC));
17778f7fce3SZach Atkins   PetscCall(DMSwarmSetCellDM(dm_swarm, dm_mesh));
17878f7fce3SZach Atkins 
17978f7fce3SZach Atkins   // -- Swarm field
18078f7fce3SZach Atkins   PetscCall(DMSwarmRegisterPetscDatatypeField(dm_swarm, DMSwarmPICField_u, num_comp_u, PETSC_SCALAR));
18178f7fce3SZach Atkins   PetscCall(DMSwarmFinalizeFieldRegister(dm_swarm));
18207a9c11fSZach Atkins   {
18307a9c11fSZach Atkins     PetscInt c_start, c_end, num_cells_local;
18407a9c11fSZach Atkins     PetscCall(DMPlexGetHeightStratum(dm_mesh, 0, &c_start, &c_end));
18507a9c11fSZach Atkins     num_cells_local = c_end - c_start;
18607a9c11fSZach Atkins     PetscCall(DMSwarmSetLocalSizes(dm_swarm, num_cells_local * num_points_per_cell, 0));
18707a9c11fSZach Atkins   }
18878f7fce3SZach Atkins   PetscCall(DMSetFromOptions(dm_swarm));
18978f7fce3SZach Atkins 
19078f7fce3SZach Atkins   // -- Set swarm point locations
19178f7fce3SZach Atkins   PetscCall(DMSwarmInitalizePointLocations(dm_swarm, point_swarm_type, num_points, num_points_per_cell));
19278f7fce3SZach Atkins   PetscCall(DMSwarmVectorDefineField(dm_swarm, DMSwarmPICField_u));
19378f7fce3SZach Atkins 
19478f7fce3SZach Atkins   // -- Final particle swarm
19578f7fce3SZach Atkins   PetscCall(PetscObjectSetName((PetscObject)dm_swarm, "Particle Swarm"));
19678f7fce3SZach Atkins   PetscCall(DMViewFromOptions(dm_swarm, NULL, "-dm_swarm_view"));
19778f7fce3SZach Atkins 
198*8c03e814SJeremy L Thompson   // Set up libCEED
199*8c03e814SJeremy L Thompson   CeedInit(ceed_resource, &ceed);
200*8c03e814SJeremy L Thompson   CeedMemType mem_type_backend;
201*8c03e814SJeremy L Thompson   CeedGetPreferredMemType(ceed, &mem_type_backend);
202*8c03e814SJeremy L Thompson 
203*8c03e814SJeremy L Thompson   // Set background mesh vec_type
204*8c03e814SJeremy L Thompson   switch (mem_type_backend) {
205*8c03e814SJeremy L Thompson     case CEED_MEM_HOST:
206*8c03e814SJeremy L Thompson       vec_type = VECSTANDARD;
207*8c03e814SJeremy L Thompson       break;
208*8c03e814SJeremy L Thompson     case CEED_MEM_DEVICE: {
209*8c03e814SJeremy L Thompson       const char *resolved;
210*8c03e814SJeremy L Thompson 
211*8c03e814SJeremy L Thompson       CeedGetResource(ceed, &resolved);
212*8c03e814SJeremy L Thompson       if (strstr(resolved, "/gpu/cuda")) vec_type = VECCUDA;
213*8c03e814SJeremy L Thompson       else if (strstr(resolved, "/gpu/hip/occa")) vec_type = VECSTANDARD;  // https://github.com/CEED/libCEED/issues/678
214*8c03e814SJeremy L Thompson       else if (strstr(resolved, "/gpu/hip")) vec_type = VECHIP;
215*8c03e814SJeremy L Thompson       else vec_type = VECSTANDARD;
216*8c03e814SJeremy L Thompson     }
217*8c03e814SJeremy L Thompson   }
218*8c03e814SJeremy L Thompson   PetscCall(DMSetVecType(dm_mesh, vec_type));
219*8c03e814SJeremy L Thompson 
22078f7fce3SZach Atkins   // Create vectors
22178f7fce3SZach Atkins   PetscCall(DMCreateGlobalVector(dm_mesh, &X));
22278f7fce3SZach Atkins   PetscCall(VecGetLocalSize(X, &l_size));
22378f7fce3SZach Atkins   PetscCall(VecGetSize(X, &g_size));
22478f7fce3SZach Atkins   PetscCall(DMCreateLocalVector(dm_mesh, &X_loc));
22578f7fce3SZach Atkins   PetscCall(VecGetSize(X_loc, &xl_size));
22678f7fce3SZach Atkins   PetscCall(VecDuplicate(X, &rhs));
22778f7fce3SZach Atkins 
22878f7fce3SZach Atkins   // Operator
22978f7fce3SZach Atkins   PetscCall(PetscMalloc1(1, &op_apply_ctx));
23078f7fce3SZach Atkins   PetscCall(PetscMalloc1(1, &op_error_ctx));
23178f7fce3SZach Atkins   PetscCall(MatCreateShell(comm, l_size, l_size, g_size, g_size, op_apply_ctx, &mat_O));
23278f7fce3SZach Atkins   PetscCall(MatSetDM(mat_O, dm_mesh));
23378f7fce3SZach Atkins   PetscCall(MatShellSetOperation(mat_O, MATOP_MULT, (void (*)(void))MatMult_Ceed));
2349c674643SJeremy L Thompson   PetscCall(MatShellSetOperation(mat_O, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiag));
23578f7fce3SZach Atkins 
23678f7fce3SZach Atkins   // Print summary
23778f7fce3SZach Atkins   if (!test_mode) {
23878f7fce3SZach Atkins     PetscInt P = degree + 1, Q = P + q_extra;
23978f7fce3SZach Atkins 
24078f7fce3SZach Atkins     const char *used_resource;
24178f7fce3SZach Atkins     CeedGetResource(ceed, &used_resource);
24278f7fce3SZach Atkins 
24378f7fce3SZach Atkins     VecType vec_type;
24478f7fce3SZach Atkins     PetscCall(VecGetType(X, &vec_type));
24578f7fce3SZach Atkins 
24678f7fce3SZach Atkins     PetscInt c_start, c_end, num_cells_local;
24778f7fce3SZach Atkins     PetscCall(DMPlexGetHeightStratum(dm_mesh, 0, &c_start, &c_end));
24878f7fce3SZach Atkins     num_cells_local = c_end - c_start;
24978f7fce3SZach Atkins     DMPolytopeType cell_type;
25078f7fce3SZach Atkins     PetscCall(DMPlexGetCellType(dm_mesh, c_start, &cell_type));
25178f7fce3SZach Atkins     PetscMPIInt comm_size;
25278f7fce3SZach Atkins     PetscCall(MPI_Comm_size(comm, &comm_size));
25378f7fce3SZach Atkins 
25407a9c11fSZach Atkins     PetscInt num_points_local, num_points_global;
25578f7fce3SZach Atkins     PetscCall(DMSwarmGetLocalSize(dm_swarm, &num_points_local));
25607a9c11fSZach Atkins     PetscCall(DMSwarmGetSize(dm_swarm, &num_points_global));
25778f7fce3SZach Atkins 
25878f7fce3SZach Atkins     PetscCall(PetscPrintf(comm,
25978f7fce3SZach Atkins                           "\n-- CEED Benchmark Problem %" CeedInt_FMT " -- libCEED + PETSc --\n"
26078f7fce3SZach Atkins                           "  MPI:\n"
26178f7fce3SZach Atkins                           "    Hostname                                : %s\n"
26278f7fce3SZach Atkins                           "    Total ranks                             : %d\n"
26378f7fce3SZach Atkins                           "    Ranks per compute node                  : %d\n"
26478f7fce3SZach Atkins                           "  PETSc:\n"
26578f7fce3SZach Atkins                           "    PETSc Vec Type                          : %s\n"
26678f7fce3SZach Atkins                           "  libCEED:\n"
26778f7fce3SZach Atkins                           "    libCEED Backend                         : %s\n"
26878f7fce3SZach Atkins                           "    libCEED Backend MemType                 : %s\n"
26978f7fce3SZach Atkins                           "  Mesh:\n"
2704d00b080SJeremy L Thompson                           "    Solution Order (P)                      : %" PetscInt_FMT "\n"
2714d00b080SJeremy L Thompson                           "    Quadrature  Order (Q)                   : %" PetscInt_FMT "\n"
2724d00b080SJeremy L Thompson                           "    Additional quadrature points (q_extra)  : %" PetscInt_FMT "\n"
27378f7fce3SZach Atkins                           "    Global nodes                            : %" PetscInt_FMT "\n"
27478f7fce3SZach Atkins                           "    Local Elements                          : %" PetscInt_FMT "\n"
27578f7fce3SZach Atkins                           "    Owned nodes                             : %" PetscInt_FMT "\n"
27678f7fce3SZach Atkins                           "    DoF per node                            : %" PetscInt_FMT "\n"
27778f7fce3SZach Atkins                           "  Swarm:\n"
27878f7fce3SZach Atkins                           "    Global points                           : %" PetscInt_FMT "\n"
27978f7fce3SZach Atkins                           "    Local points                            : %" PetscInt_FMT "\n"
28078f7fce3SZach Atkins                           "    Avg points per cell                     : %" PetscInt_FMT "\n"
28178f7fce3SZach Atkins                           "    Point distribution                      : %s\n",
28278f7fce3SZach Atkins                           bp_choice + 1, hostname, comm_size, ranks_per_node, vec_type, used_resource, CeedMemTypes[mem_type_backend], P, Q, q_extra,
28307a9c11fSZach Atkins                           g_size / num_comp_u, num_cells_local, l_size / num_comp_u, num_comp_u, num_points_global, num_points_local,
28478f7fce3SZach Atkins                           num_cells_local > 0 ? num_points_local / num_cells_local : 0, point_swarm_types[point_swarm_type]));
28578f7fce3SZach Atkins   }
28678f7fce3SZach Atkins 
28778f7fce3SZach Atkins   // Setup libCEED's objects
28878f7fce3SZach Atkins   Vec target;
28978f7fce3SZach Atkins 
29078f7fce3SZach Atkins   PetscCall(DMCreateLocalVector(dm_swarm, &target));
29178f7fce3SZach Atkins   PetscCall(PetscMalloc1(1, &ceed_data));
29278f7fce3SZach Atkins   PetscCall(SetupProblemSwarm(dm_swarm, ceed, bp_options[bp_choice], ceed_data, true, rhs, target));
29378f7fce3SZach Atkins   PetscCall(SetupErrorOperator(dm_mesh, ceed, bp_options[bp_choice], dim, dim, num_comp_u, &op_error));
29478f7fce3SZach Atkins 
29578f7fce3SZach Atkins   // Set up apply operator context
29678f7fce3SZach Atkins   PetscCall(SetupApplyOperatorCtx(comm, dm_mesh, ceed, ceed_data, X_loc, op_apply_ctx));
29778f7fce3SZach Atkins 
29878f7fce3SZach Atkins   // Setup solver
29978f7fce3SZach Atkins   PetscCall(KSPCreate(comm, &ksp));
30078f7fce3SZach Atkins   {
30178f7fce3SZach Atkins     PC pc;
30278f7fce3SZach Atkins     PetscCall(KSPGetPC(ksp, &pc));
30378f7fce3SZach Atkins     if (bp_choice == CEED_BP1 || bp_choice == CEED_BP2) {
30478f7fce3SZach Atkins       PetscCall(PCSetType(pc, PCJACOBI));
3059c674643SJeremy L Thompson       PetscCall(PCJacobiSetType(pc, PC_JACOBI_DIAGONAL));
30678f7fce3SZach Atkins     } else {
30778f7fce3SZach Atkins       PetscCall(PCSetType(pc, PCNONE));
30878f7fce3SZach Atkins     }
30978f7fce3SZach Atkins     PetscCall(KSPSetType(ksp, KSPCG));
31078f7fce3SZach Atkins     PetscCall(KSPSetNormType(ksp, KSP_NORM_NATURAL));
31178f7fce3SZach Atkins     PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
31278f7fce3SZach Atkins   }
31378f7fce3SZach Atkins   PetscCall(KSPSetFromOptions(ksp));
31478f7fce3SZach Atkins   PetscCall(KSPSetOperators(ksp, mat_O, mat_O));
31578f7fce3SZach Atkins 
31678f7fce3SZach Atkins   // First run, if benchmarking
31778f7fce3SZach Atkins   if (benchmark_mode) {
31878f7fce3SZach Atkins     PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1));
31978f7fce3SZach Atkins     my_rt_start = MPI_Wtime();
32078f7fce3SZach Atkins     PetscCall(KSPSolve(ksp, rhs, X));
32178f7fce3SZach Atkins     my_rt = MPI_Wtime() - my_rt_start;
32278f7fce3SZach Atkins     PetscCall(MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm));
32378f7fce3SZach Atkins     // Set maxits based on first iteration timing
32478f7fce3SZach Atkins     if (my_rt > 0.02) {
32578f7fce3SZach Atkins       PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5));
32678f7fce3SZach Atkins     } else {
32778f7fce3SZach Atkins       PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20));
32878f7fce3SZach Atkins     }
32978f7fce3SZach Atkins   }
33078f7fce3SZach Atkins 
33178f7fce3SZach Atkins   // Timed solve
33278f7fce3SZach Atkins   PetscCall(VecZeroEntries(X));
33378f7fce3SZach Atkins   PetscCall(PetscBarrier((PetscObject)ksp));
33478f7fce3SZach Atkins 
33578f7fce3SZach Atkins   // -- Performance logging
33678f7fce3SZach Atkins   PetscCall(PetscLogStageRegister("Solve Stage", &solve_stage));
33778f7fce3SZach Atkins   PetscCall(PetscLogStagePush(solve_stage));
33878f7fce3SZach Atkins 
33978f7fce3SZach Atkins   // -- Solve
34078f7fce3SZach Atkins   my_rt_start = MPI_Wtime();
34178f7fce3SZach Atkins   PetscCall(KSPSolve(ksp, rhs, X));
34278f7fce3SZach Atkins   my_rt = MPI_Wtime() - my_rt_start;
34378f7fce3SZach Atkins 
34478f7fce3SZach Atkins   // -- Performance logging
34578f7fce3SZach Atkins   PetscCall(PetscLogStagePop());
34678f7fce3SZach Atkins 
34778f7fce3SZach Atkins   // Output results
34878f7fce3SZach Atkins   {
34978f7fce3SZach Atkins     KSPType            ksp_type;
35078f7fce3SZach Atkins     KSPConvergedReason reason;
35178f7fce3SZach Atkins     PetscReal          rnorm;
35278f7fce3SZach Atkins     PetscInt           its;
35378f7fce3SZach Atkins     PetscCall(KSPGetType(ksp, &ksp_type));
35478f7fce3SZach Atkins     PetscCall(KSPGetConvergedReason(ksp, &reason));
35578f7fce3SZach Atkins     PetscCall(KSPGetIterationNumber(ksp, &its));
35678f7fce3SZach Atkins     PetscCall(KSPGetResidualNorm(ksp, &rnorm));
35778f7fce3SZach Atkins     if (!test_mode || reason < 0 || rnorm > 1e-8) {
35878f7fce3SZach Atkins       PetscCall(PetscPrintf(comm,
35978f7fce3SZach Atkins                             "  KSP:\n"
36078f7fce3SZach Atkins                             "    KSP Type                                : %s\n"
36178f7fce3SZach Atkins                             "    KSP Convergence                         : %s\n"
36278f7fce3SZach Atkins                             "    Total KSP Iterations                    : %" PetscInt_FMT "\n"
36378f7fce3SZach Atkins                             "    Final rnorm                             : %e\n",
36478f7fce3SZach Atkins                             ksp_type, KSPConvergedReasons[reason], its, (double)rnorm));
36578f7fce3SZach Atkins     }
36678f7fce3SZach Atkins     if (!test_mode) {
36778f7fce3SZach Atkins       PetscCall(PetscPrintf(comm, "  Performance:\n"));
36878f7fce3SZach Atkins     }
36978f7fce3SZach Atkins 
37078f7fce3SZach Atkins     // View true solution at particles
37178f7fce3SZach Atkins     if (write_true_solution_swarm) {
37278f7fce3SZach Atkins       Vec u_swarm, u_swarm_old;
37378f7fce3SZach Atkins       PetscCall(DMSwarmSortGetAccess(dm_swarm));
37478f7fce3SZach Atkins       PetscCall(DMSwarmCreateLocalVectorFromField(dm_swarm, DMSwarmPICField_u, &u_swarm));
37578f7fce3SZach Atkins       PetscCall(VecDuplicate(u_swarm, &u_swarm_old));
37678f7fce3SZach Atkins       PetscCall(VecCopy(u_swarm, u_swarm_old));
37778f7fce3SZach Atkins       PetscCall(VecCopy(target, u_swarm));
37878f7fce3SZach Atkins       PetscCall(DMSwarmDestroyLocalVectorFromField(dm_swarm, DMSwarmPICField_u, &u_swarm));
37978f7fce3SZach Atkins       PetscCall(DMSwarmSortRestoreAccess(dm_swarm));
38078f7fce3SZach Atkins 
38178f7fce3SZach Atkins       PetscCall(DMSwarmViewXDMF(dm_swarm, "swarm.xmf"));
38278f7fce3SZach Atkins 
38378f7fce3SZach Atkins       PetscCall(DMSwarmSortGetAccess(dm_swarm));
38478f7fce3SZach Atkins       PetscCall(DMSwarmCreateLocalVectorFromField(dm_swarm, DMSwarmPICField_u, &u_swarm));
38578f7fce3SZach Atkins       PetscCall(VecCopy(u_swarm_old, u_swarm));
38678f7fce3SZach Atkins       PetscCall(DMSwarmDestroyLocalVectorFromField(dm_swarm, DMSwarmPICField_u, &u_swarm));
38778f7fce3SZach Atkins       PetscCall(DMSwarmSortRestoreAccess(dm_swarm));
38878f7fce3SZach Atkins       PetscCall(VecDestroy(&u_swarm_old));
38978f7fce3SZach Atkins     }
39078f7fce3SZach Atkins 
39178f7fce3SZach Atkins     // View solution at mesh points
39278f7fce3SZach Atkins     PetscCall(VecViewFromOptions(X, NULL, "-solution_view"));
39378f7fce3SZach Atkins 
39478f7fce3SZach Atkins     // Compute L2 Error
39578f7fce3SZach Atkins     {
39678f7fce3SZach Atkins       // Set up error operator context
39778f7fce3SZach Atkins       PetscCall(SetupErrorOperatorCtx(comm, dm_mesh, ceed, ceed_data, X_loc, op_error, op_error_ctx));
39878f7fce3SZach Atkins       PetscScalar l2_error;
39978f7fce3SZach Atkins       PetscCall(ComputeL2Error(X, &l2_error, op_error_ctx));
40078f7fce3SZach Atkins 
40107a9c11fSZach Atkins       if (!test_mode || l2_error > tolerance) {
40278f7fce3SZach Atkins         PetscCall(MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm));
40378f7fce3SZach Atkins         PetscCall(MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm));
40478f7fce3SZach Atkins         PetscCall(PetscPrintf(comm,
40578f7fce3SZach Atkins                               "    L2 Error                                : %e\n"
40678f7fce3SZach Atkins                               "    CG Solve Time                           : %g (%g) sec\n",
40778f7fce3SZach Atkins                               (double)l2_error, rt_max, rt_min));
40878f7fce3SZach Atkins       }
40978f7fce3SZach Atkins     }
41078f7fce3SZach Atkins     if (benchmark_mode && (!test_mode)) {
41178f7fce3SZach Atkins       PetscCall(PetscPrintf(comm, "    DoFs/Sec in CG                            : %g (%g) million\n", 1e-6 * g_size * its / rt_max,
41278f7fce3SZach Atkins                             1e-6 * g_size * its / rt_min));
41378f7fce3SZach Atkins     }
41478f7fce3SZach Atkins   }
41578f7fce3SZach Atkins 
41678f7fce3SZach Atkins   // Output solution
41778f7fce3SZach Atkins   if (write_solution) {
41878f7fce3SZach Atkins     PetscViewer vtk_viewer_soln;
41978f7fce3SZach Atkins 
42078f7fce3SZach Atkins     PetscCall(PetscViewerCreate(comm, &vtk_viewer_soln));
42178f7fce3SZach Atkins     PetscCall(PetscViewerSetType(vtk_viewer_soln, PETSCVIEWERVTK));
42278f7fce3SZach Atkins     PetscCall(PetscViewerFileSetName(vtk_viewer_soln, "solution.vtu"));
42378f7fce3SZach Atkins     PetscCall(VecView(X, vtk_viewer_soln));
42478f7fce3SZach Atkins     PetscCall(PetscViewerDestroy(&vtk_viewer_soln));
42578f7fce3SZach Atkins   }
42678f7fce3SZach Atkins 
42778f7fce3SZach Atkins   // Cleanup
42878f7fce3SZach Atkins   PetscCall(VecDestroy(&X));
42978f7fce3SZach Atkins   PetscCall(VecDestroy(&X_loc));
43078f7fce3SZach Atkins   PetscCall(VecDestroy(&op_apply_ctx->Y_loc));
43178f7fce3SZach Atkins   PetscCall(VecDestroy(&op_error_ctx->Y_loc));
43278f7fce3SZach Atkins   PetscCall(MatDestroy(&mat_O));
43378f7fce3SZach Atkins   PetscCall(PetscFree(op_apply_ctx));
43478f7fce3SZach Atkins   PetscCall(PetscFree(op_error_ctx));
43578f7fce3SZach Atkins   PetscCall(CeedDataDestroy(0, ceed_data));
43678f7fce3SZach Atkins   PetscCall(DMDestroy(&dm_mesh));
43778f7fce3SZach Atkins   PetscCall(DMDestroy(&dm_swarm));
43878f7fce3SZach Atkins 
43978f7fce3SZach Atkins   PetscCall(VecDestroy(&rhs));
44078f7fce3SZach Atkins   PetscCall(VecDestroy(&target));
44178f7fce3SZach Atkins   PetscCall(KSPDestroy(&ksp));
44278f7fce3SZach Atkins   CeedOperatorDestroy(&op_error);
44378f7fce3SZach Atkins   CeedDestroy(&ceed);
44478f7fce3SZach Atkins   return PetscFinalize();
44578f7fce3SZach Atkins }
446