15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 278f7fce3SZach Atkins // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 378f7fce3SZach Atkins // 478f7fce3SZach Atkins // SPDX-License-Identifier: BSD-2-Clause 578f7fce3SZach Atkins // 678f7fce3SZach Atkins // This file is part of CEED: http://github.com/ceed 778f7fce3SZach Atkins 878f7fce3SZach Atkins // libCEED + PETSc Example: CEED BPs 978f7fce3SZach Atkins // 1078f7fce3SZach Atkins // This example demonstrates a simple usage of libCEED with PETSc to solve the CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps, on 1178f7fce3SZach Atkins // a particle swarm. 1278f7fce3SZach Atkins // 1378f7fce3SZach Atkins // The code uses higher level communication protocols in DMPlex and DMSwarm. 1478f7fce3SZach Atkins // 1578f7fce3SZach Atkins // Build with: 1678f7fce3SZach Atkins // 1778f7fce3SZach Atkins // make bpsswarm [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 1878f7fce3SZach Atkins // 1978f7fce3SZach Atkins // Sample runs: 2078f7fce3SZach Atkins // 2178f7fce3SZach Atkins // bpssphere -problem bp1 -degree 3 2278f7fce3SZach Atkins // bpssphere -problem bp2 -degree 3 2378f7fce3SZach Atkins // bpssphere -problem bp3 -degree 3 2478f7fce3SZach Atkins // 2507a9c11fSZach Atkins //TESTARGS(name="BP2") -ceed {ceed_resource} -test -problem bp2 -dm_plex_dim 3 -dm_plex_box_faces 3,3,3 -dm_plex_simplex 0 -swarm uniform -points_per_cell 64 2607a9c11fSZach Atkins //TESTARGS(name="BP3") -ceed {ceed_resource} -test -problem bp3 -dm_plex_dim 3 -dm_plex_box_faces 4,4,4 -dm_plex_simplex 0 -swarm uniform -points_per_cell 64 -tolerance 3e-2 2707a9c11fSZach Atkins //TESTARGS(name="BP5") -ceed {ceed_resource} -test -problem bp5 -dm_plex_dim 3 -dm_plex_box_faces 3,3,3 -dm_plex_simplex 0 -swarm uniform -points_per_cell 64 2878f7fce3SZach Atkins 2978f7fce3SZach Atkins /// @file 3078f7fce3SZach Atkins /// CEED BPs example using PETSc with DMPlex 3178f7fce3SZach Atkins /// See bpsraw.c for a "raw" implementation using a structured grid and bps.c for an implementation using an unstructured grid. 3278f7fce3SZach Atkins static const char help[] = "Solve CEED BPs on a particle swarm using DMPlex and DMSwarm in PETSc\n"; 3378f7fce3SZach Atkins const char DMSwarmPICField_u[] = "u"; 3478f7fce3SZach Atkins 3578f7fce3SZach Atkins #include "bps.h" 3678f7fce3SZach Atkins 3778f7fce3SZach Atkins #include <ceed.h> 3878f7fce3SZach Atkins #include <petscdmplex.h> 3978f7fce3SZach Atkins #include <petscksp.h> 4078f7fce3SZach Atkins #include <stdbool.h> 4178f7fce3SZach Atkins #include <string.h> 4278f7fce3SZach Atkins 4378f7fce3SZach Atkins #include "include/bpsproblemdata.h" 4478f7fce3SZach Atkins #include "include/libceedsetup.h" 4578f7fce3SZach Atkins #include "include/matops.h" 4678f7fce3SZach Atkins #include "include/petscutils.h" 4778f7fce3SZach Atkins #include "include/petscversion.h" 4878f7fce3SZach Atkins #include "include/swarmutils.h" 4978f7fce3SZach Atkins 5078f7fce3SZach Atkins int main(int argc, char **argv) { 5178f7fce3SZach Atkins MPI_Comm comm; 5278f7fce3SZach Atkins char ceed_resource[PETSC_MAX_PATH_LEN] = "/cpu/self", filename[PETSC_MAX_PATH_LEN]; 5378f7fce3SZach Atkins double my_rt_start, my_rt, rt_min, rt_max; 5407a9c11fSZach Atkins PetscScalar tolerance; 554d00b080SJeremy L Thompson PetscMPIInt comm_size; 564d00b080SJeremy L Thompson PetscInt degree, q_extra, l_size, g_size, dim = 3, num_comp_u = 1, xl_size, num_points = 1728, num_points_per_cell = 64; 5778f7fce3SZach Atkins PetscBool test_mode, benchmark_mode, read_mesh, write_solution, write_true_solution_swarm; 5878f7fce3SZach Atkins PetscLogStage solve_stage; 5978f7fce3SZach Atkins Vec X, X_loc, rhs; 6078f7fce3SZach Atkins Mat mat_O; 6178f7fce3SZach Atkins KSP ksp; 6278f7fce3SZach Atkins DM dm_mesh, dm_swarm; 6378f7fce3SZach Atkins OperatorApplyContext op_apply_ctx, op_error_ctx; 6478f7fce3SZach Atkins Ceed ceed; 6578f7fce3SZach Atkins CeedData ceed_data; 6678f7fce3SZach Atkins CeedOperator op_error; 6778f7fce3SZach Atkins BPType bp_choice; 68*8c03e814SJeremy L Thompson VecType vec_type = VECSTANDARD; 6978f7fce3SZach Atkins PointSwarmType point_swarm_type = SWARM_GAUSS; 7078f7fce3SZach Atkins PetscMPIInt ranks_per_node; 7178f7fce3SZach Atkins char hostname[PETSC_MAX_PATH_LEN]; 7278f7fce3SZach Atkins 7378f7fce3SZach Atkins PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 7478f7fce3SZach Atkins comm = PETSC_COMM_WORLD; 7578f7fce3SZach Atkins PetscCall(MPI_Comm_size(comm, &comm_size)); 7678f7fce3SZach Atkins #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) 7778f7fce3SZach Atkins { 7878f7fce3SZach Atkins MPI_Comm splitcomm; 7978f7fce3SZach Atkins PetscCall(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &splitcomm)); 8078f7fce3SZach Atkins PetscCall(MPI_Comm_size(splitcomm, &ranks_per_node)); 8178f7fce3SZach Atkins PetscCall(MPI_Comm_free(&splitcomm)); 8278f7fce3SZach Atkins } 8378f7fce3SZach Atkins #else 8478f7fce3SZach Atkins ranks_per_node = -1; // Unknown 8578f7fce3SZach Atkins #endif 8678f7fce3SZach Atkins 8778f7fce3SZach Atkins // Read command line options 8878f7fce3SZach Atkins PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); 8978f7fce3SZach Atkins bp_choice = CEED_BP1; 9078f7fce3SZach Atkins PetscCall(PetscOptionsEnum("-problem", "CEED benchmark problem to solve", NULL, bp_types, (PetscEnum)bp_choice, (PetscEnum *)&bp_choice, NULL)); 9178f7fce3SZach Atkins num_comp_u = bp_options[bp_choice].num_comp_u; 9278f7fce3SZach Atkins test_mode = PETSC_FALSE; 9378f7fce3SZach Atkins PetscCall(PetscOptionsBool("-test", "Testing mode (do not print unless error is large)", NULL, test_mode, &test_mode, NULL)); 9478f7fce3SZach Atkins benchmark_mode = PETSC_FALSE; 9578f7fce3SZach Atkins PetscCall(PetscOptionsBool("-benchmark", "Benchmarking mode (prints benchmark statistics)", NULL, benchmark_mode, &benchmark_mode, NULL)); 9678f7fce3SZach Atkins write_solution = PETSC_FALSE; 9778f7fce3SZach Atkins PetscCall(PetscOptionsBool("-write_solution", "Write solution for visualization", NULL, write_solution, &write_solution, NULL)); 9878f7fce3SZach Atkins write_true_solution_swarm = PETSC_FALSE; 9978f7fce3SZach Atkins PetscCall(PetscOptionsBool("-write_true_solution_swarm", "Write true solution at swarm points for visualization", NULL, write_true_solution_swarm, 10078f7fce3SZach Atkins &write_true_solution_swarm, NULL)); 10107a9c11fSZach Atkins degree = 2; 10278f7fce3SZach Atkins PetscCall(PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", NULL, degree, °ree, NULL)); 10378f7fce3SZach Atkins q_extra = bp_options[bp_choice].q_extra; 10478f7fce3SZach Atkins PetscCall(PetscOptionsInt("-q_extra", "Number of extra quadrature points", NULL, q_extra, &q_extra, NULL)); 10578f7fce3SZach Atkins PetscCall(PetscOptionsString("-ceed", "CEED resource specifier", NULL, ceed_resource, ceed_resource, sizeof(ceed_resource), NULL)); 10678f7fce3SZach Atkins PetscCall(PetscGetHostName(hostname, sizeof hostname)); 10778f7fce3SZach Atkins PetscCall(PetscOptionsString("-hostname", "Hostname for output", NULL, hostname, hostname, sizeof(hostname), NULL)); 10878f7fce3SZach Atkins read_mesh = PETSC_FALSE; 10978f7fce3SZach Atkins PetscCall(PetscOptionsString("-mesh", "Read mesh from file", NULL, filename, filename, sizeof(filename), &read_mesh)); 11007a9c11fSZach Atkins tolerance = 1e-2; 11107a9c11fSZach Atkins PetscCall(PetscOptionsScalar("-tolerance", "Tolerance for L2 error", NULL, tolerance, &tolerance, NULL)); 11278f7fce3SZach Atkins PetscCall(PetscOptionsEnum("-swarm", "Swarm points distribution", NULL, point_swarm_types, (PetscEnum)point_swarm_type, 11378f7fce3SZach Atkins (PetscEnum *)&point_swarm_type, NULL)); 11478f7fce3SZach Atkins { 11578f7fce3SZach Atkins PetscBool user_set_num_points_per_cell = PETSC_FALSE; 11678f7fce3SZach Atkins PetscInt num_cells_total = 1, tmp = dim; 11778f7fce3SZach Atkins PetscInt num_cells[] = {1, 1, 1}; 11878f7fce3SZach Atkins 11978f7fce3SZach Atkins PetscCall(PetscOptionsInt("-points_per_cell", "Total number of swarm points in each cell", NULL, num_points_per_cell, &num_points_per_cell, 12078f7fce3SZach Atkins &user_set_num_points_per_cell)); 12178f7fce3SZach Atkins PetscCall(PetscOptionsInt("-dm_plex_dim", "Background mesh dimension", NULL, dim, &dim, NULL)); 12278f7fce3SZach Atkins PetscCall(PetscOptionsIntArray("-dm_plex_box_faces", "Number of cells", NULL, num_cells, &tmp, NULL)); 12378f7fce3SZach Atkins 12478f7fce3SZach Atkins PetscCheck(tmp == dim, comm, PETSC_ERR_USER, "Number of values for -dm_plex_box_faces must match dimension"); 12578f7fce3SZach Atkins 12678f7fce3SZach Atkins num_cells_total = num_cells[0] * num_cells[1] * num_cells[2]; 12778f7fce3SZach Atkins PetscCheck(!user_set_num_points_per_cell || point_swarm_type != SWARM_SINUSOIDAL, comm, PETSC_ERR_USER, 12878f7fce3SZach Atkins "Cannot specify points per cell with sinusoidal points locations"); 12978f7fce3SZach Atkins if (!user_set_num_points_per_cell) { 13078f7fce3SZach Atkins PetscCall(PetscOptionsInt("-points", "Total number of swarm points", NULL, num_points, &num_points, NULL)); 13178f7fce3SZach Atkins num_points_per_cell = PetscCeilInt(num_points, num_cells_total); 13278f7fce3SZach Atkins } 13378f7fce3SZach Atkins if (point_swarm_type != SWARM_SINUSOIDAL) { 13478f7fce3SZach Atkins PetscInt num_points_per_cell_1d = round(cbrt(num_points_per_cell * 1.0)); 13578f7fce3SZach Atkins 13678f7fce3SZach Atkins num_points_per_cell = 1; 13778f7fce3SZach Atkins for (PetscInt i = 0; i < dim; i++) num_points_per_cell *= num_points_per_cell_1d; 13878f7fce3SZach Atkins } 13978f7fce3SZach Atkins num_points = num_points_per_cell * num_cells_total; 14078f7fce3SZach Atkins } 14178f7fce3SZach Atkins { 14278f7fce3SZach Atkins PetscBool flg; 14378f7fce3SZach Atkins PetscInt p = ranks_per_node; 14478f7fce3SZach Atkins PetscCall(PetscOptionsInt("-p", "Number of MPI ranks per node", NULL, p, &p, &flg)); 14578f7fce3SZach Atkins if (flg) ranks_per_node = p; 14678f7fce3SZach Atkins } 14778f7fce3SZach Atkins PetscOptionsEnd(); 14878f7fce3SZach Atkins 14978f7fce3SZach Atkins // Setup DM 15078f7fce3SZach Atkins if (read_mesh) { 15178f7fce3SZach Atkins PetscCall(DMPlexCreateFromFile(comm, filename, NULL, PETSC_TRUE, &dm_mesh)); 15278f7fce3SZach Atkins } else { 15378f7fce3SZach Atkins PetscCall(DMCreate(comm, &dm_mesh)); 15478f7fce3SZach Atkins PetscCall(DMSetType(dm_mesh, DMPLEX)); 15578f7fce3SZach Atkins PetscCall(DMSetFromOptions(dm_mesh)); 15678f7fce3SZach Atkins 15778f7fce3SZach Atkins // -- Check for tensor product mesh 15878f7fce3SZach Atkins { 15978f7fce3SZach Atkins PetscBool is_simplex; 16078f7fce3SZach Atkins 16178f7fce3SZach Atkins PetscCall(DMPlexIsSimplex(dm_mesh, &is_simplex)); 16278f7fce3SZach Atkins PetscCheck(!is_simplex, comm, PETSC_ERR_USER, "Only tensor-product background meshes supported"); 16378f7fce3SZach Atkins } 16478f7fce3SZach Atkins } 16578f7fce3SZach Atkins PetscCall(DMGetDimension(dm_mesh, &dim)); 16678f7fce3SZach Atkins PetscCall(SetupDMByDegree(dm_mesh, degree, q_extra, num_comp_u, dim, bp_options[bp_choice].enforce_bc)); 16778f7fce3SZach Atkins 16878f7fce3SZach Atkins // View mesh 16978f7fce3SZach Atkins PetscCall(DMSetOptionsPrefix(dm_mesh, "final_")); 17078f7fce3SZach Atkins PetscCall(DMViewFromOptions(dm_mesh, NULL, "-dm_view")); 17178f7fce3SZach Atkins 17278f7fce3SZach Atkins // Create particle swarm 17378f7fce3SZach Atkins PetscCall(DMCreate(comm, &dm_swarm)); 17478f7fce3SZach Atkins PetscCall(DMSetType(dm_swarm, DMSWARM)); 17578f7fce3SZach Atkins PetscCall(DMSetDimension(dm_swarm, dim)); 17678f7fce3SZach Atkins PetscCall(DMSwarmSetType(dm_swarm, DMSWARM_PIC)); 17778f7fce3SZach Atkins PetscCall(DMSwarmSetCellDM(dm_swarm, dm_mesh)); 17878f7fce3SZach Atkins 17978f7fce3SZach Atkins // -- Swarm field 18078f7fce3SZach Atkins PetscCall(DMSwarmRegisterPetscDatatypeField(dm_swarm, DMSwarmPICField_u, num_comp_u, PETSC_SCALAR)); 18178f7fce3SZach Atkins PetscCall(DMSwarmFinalizeFieldRegister(dm_swarm)); 18207a9c11fSZach Atkins { 18307a9c11fSZach Atkins PetscInt c_start, c_end, num_cells_local; 18407a9c11fSZach Atkins PetscCall(DMPlexGetHeightStratum(dm_mesh, 0, &c_start, &c_end)); 18507a9c11fSZach Atkins num_cells_local = c_end - c_start; 18607a9c11fSZach Atkins PetscCall(DMSwarmSetLocalSizes(dm_swarm, num_cells_local * num_points_per_cell, 0)); 18707a9c11fSZach Atkins } 18878f7fce3SZach Atkins PetscCall(DMSetFromOptions(dm_swarm)); 18978f7fce3SZach Atkins 19078f7fce3SZach Atkins // -- Set swarm point locations 19178f7fce3SZach Atkins PetscCall(DMSwarmInitalizePointLocations(dm_swarm, point_swarm_type, num_points, num_points_per_cell)); 19278f7fce3SZach Atkins PetscCall(DMSwarmVectorDefineField(dm_swarm, DMSwarmPICField_u)); 19378f7fce3SZach Atkins 19478f7fce3SZach Atkins // -- Final particle swarm 19578f7fce3SZach Atkins PetscCall(PetscObjectSetName((PetscObject)dm_swarm, "Particle Swarm")); 19678f7fce3SZach Atkins PetscCall(DMViewFromOptions(dm_swarm, NULL, "-dm_swarm_view")); 19778f7fce3SZach Atkins 198*8c03e814SJeremy L Thompson // Set up libCEED 199*8c03e814SJeremy L Thompson CeedInit(ceed_resource, &ceed); 200*8c03e814SJeremy L Thompson CeedMemType mem_type_backend; 201*8c03e814SJeremy L Thompson CeedGetPreferredMemType(ceed, &mem_type_backend); 202*8c03e814SJeremy L Thompson 203*8c03e814SJeremy L Thompson // Set background mesh vec_type 204*8c03e814SJeremy L Thompson switch (mem_type_backend) { 205*8c03e814SJeremy L Thompson case CEED_MEM_HOST: 206*8c03e814SJeremy L Thompson vec_type = VECSTANDARD; 207*8c03e814SJeremy L Thompson break; 208*8c03e814SJeremy L Thompson case CEED_MEM_DEVICE: { 209*8c03e814SJeremy L Thompson const char *resolved; 210*8c03e814SJeremy L Thompson 211*8c03e814SJeremy L Thompson CeedGetResource(ceed, &resolved); 212*8c03e814SJeremy L Thompson if (strstr(resolved, "/gpu/cuda")) vec_type = VECCUDA; 213*8c03e814SJeremy L Thompson else if (strstr(resolved, "/gpu/hip/occa")) vec_type = VECSTANDARD; // https://github.com/CEED/libCEED/issues/678 214*8c03e814SJeremy L Thompson else if (strstr(resolved, "/gpu/hip")) vec_type = VECHIP; 215*8c03e814SJeremy L Thompson else vec_type = VECSTANDARD; 216*8c03e814SJeremy L Thompson } 217*8c03e814SJeremy L Thompson } 218*8c03e814SJeremy L Thompson PetscCall(DMSetVecType(dm_mesh, vec_type)); 219*8c03e814SJeremy L Thompson 22078f7fce3SZach Atkins // Create vectors 22178f7fce3SZach Atkins PetscCall(DMCreateGlobalVector(dm_mesh, &X)); 22278f7fce3SZach Atkins PetscCall(VecGetLocalSize(X, &l_size)); 22378f7fce3SZach Atkins PetscCall(VecGetSize(X, &g_size)); 22478f7fce3SZach Atkins PetscCall(DMCreateLocalVector(dm_mesh, &X_loc)); 22578f7fce3SZach Atkins PetscCall(VecGetSize(X_loc, &xl_size)); 22678f7fce3SZach Atkins PetscCall(VecDuplicate(X, &rhs)); 22778f7fce3SZach Atkins 22878f7fce3SZach Atkins // Operator 22978f7fce3SZach Atkins PetscCall(PetscMalloc1(1, &op_apply_ctx)); 23078f7fce3SZach Atkins PetscCall(PetscMalloc1(1, &op_error_ctx)); 23178f7fce3SZach Atkins PetscCall(MatCreateShell(comm, l_size, l_size, g_size, g_size, op_apply_ctx, &mat_O)); 23278f7fce3SZach Atkins PetscCall(MatSetDM(mat_O, dm_mesh)); 23378f7fce3SZach Atkins PetscCall(MatShellSetOperation(mat_O, MATOP_MULT, (void (*)(void))MatMult_Ceed)); 2349c674643SJeremy L Thompson PetscCall(MatShellSetOperation(mat_O, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiag)); 23578f7fce3SZach Atkins 23678f7fce3SZach Atkins // Print summary 23778f7fce3SZach Atkins if (!test_mode) { 23878f7fce3SZach Atkins PetscInt P = degree + 1, Q = P + q_extra; 23978f7fce3SZach Atkins 24078f7fce3SZach Atkins const char *used_resource; 24178f7fce3SZach Atkins CeedGetResource(ceed, &used_resource); 24278f7fce3SZach Atkins 24378f7fce3SZach Atkins VecType vec_type; 24478f7fce3SZach Atkins PetscCall(VecGetType(X, &vec_type)); 24578f7fce3SZach Atkins 24678f7fce3SZach Atkins PetscInt c_start, c_end, num_cells_local; 24778f7fce3SZach Atkins PetscCall(DMPlexGetHeightStratum(dm_mesh, 0, &c_start, &c_end)); 24878f7fce3SZach Atkins num_cells_local = c_end - c_start; 24978f7fce3SZach Atkins DMPolytopeType cell_type; 25078f7fce3SZach Atkins PetscCall(DMPlexGetCellType(dm_mesh, c_start, &cell_type)); 25178f7fce3SZach Atkins PetscMPIInt comm_size; 25278f7fce3SZach Atkins PetscCall(MPI_Comm_size(comm, &comm_size)); 25378f7fce3SZach Atkins 25407a9c11fSZach Atkins PetscInt num_points_local, num_points_global; 25578f7fce3SZach Atkins PetscCall(DMSwarmGetLocalSize(dm_swarm, &num_points_local)); 25607a9c11fSZach Atkins PetscCall(DMSwarmGetSize(dm_swarm, &num_points_global)); 25778f7fce3SZach Atkins 25878f7fce3SZach Atkins PetscCall(PetscPrintf(comm, 25978f7fce3SZach Atkins "\n-- CEED Benchmark Problem %" CeedInt_FMT " -- libCEED + PETSc --\n" 26078f7fce3SZach Atkins " MPI:\n" 26178f7fce3SZach Atkins " Hostname : %s\n" 26278f7fce3SZach Atkins " Total ranks : %d\n" 26378f7fce3SZach Atkins " Ranks per compute node : %d\n" 26478f7fce3SZach Atkins " PETSc:\n" 26578f7fce3SZach Atkins " PETSc Vec Type : %s\n" 26678f7fce3SZach Atkins " libCEED:\n" 26778f7fce3SZach Atkins " libCEED Backend : %s\n" 26878f7fce3SZach Atkins " libCEED Backend MemType : %s\n" 26978f7fce3SZach Atkins " Mesh:\n" 2704d00b080SJeremy L Thompson " Solution Order (P) : %" PetscInt_FMT "\n" 2714d00b080SJeremy L Thompson " Quadrature Order (Q) : %" PetscInt_FMT "\n" 2724d00b080SJeremy L Thompson " Additional quadrature points (q_extra) : %" PetscInt_FMT "\n" 27378f7fce3SZach Atkins " Global nodes : %" PetscInt_FMT "\n" 27478f7fce3SZach Atkins " Local Elements : %" PetscInt_FMT "\n" 27578f7fce3SZach Atkins " Owned nodes : %" PetscInt_FMT "\n" 27678f7fce3SZach Atkins " DoF per node : %" PetscInt_FMT "\n" 27778f7fce3SZach Atkins " Swarm:\n" 27878f7fce3SZach Atkins " Global points : %" PetscInt_FMT "\n" 27978f7fce3SZach Atkins " Local points : %" PetscInt_FMT "\n" 28078f7fce3SZach Atkins " Avg points per cell : %" PetscInt_FMT "\n" 28178f7fce3SZach Atkins " Point distribution : %s\n", 28278f7fce3SZach Atkins bp_choice + 1, hostname, comm_size, ranks_per_node, vec_type, used_resource, CeedMemTypes[mem_type_backend], P, Q, q_extra, 28307a9c11fSZach Atkins g_size / num_comp_u, num_cells_local, l_size / num_comp_u, num_comp_u, num_points_global, num_points_local, 28478f7fce3SZach Atkins num_cells_local > 0 ? num_points_local / num_cells_local : 0, point_swarm_types[point_swarm_type])); 28578f7fce3SZach Atkins } 28678f7fce3SZach Atkins 28778f7fce3SZach Atkins // Setup libCEED's objects 28878f7fce3SZach Atkins Vec target; 28978f7fce3SZach Atkins 29078f7fce3SZach Atkins PetscCall(DMCreateLocalVector(dm_swarm, &target)); 29178f7fce3SZach Atkins PetscCall(PetscMalloc1(1, &ceed_data)); 29278f7fce3SZach Atkins PetscCall(SetupProblemSwarm(dm_swarm, ceed, bp_options[bp_choice], ceed_data, true, rhs, target)); 29378f7fce3SZach Atkins PetscCall(SetupErrorOperator(dm_mesh, ceed, bp_options[bp_choice], dim, dim, num_comp_u, &op_error)); 29478f7fce3SZach Atkins 29578f7fce3SZach Atkins // Set up apply operator context 29678f7fce3SZach Atkins PetscCall(SetupApplyOperatorCtx(comm, dm_mesh, ceed, ceed_data, X_loc, op_apply_ctx)); 29778f7fce3SZach Atkins 29878f7fce3SZach Atkins // Setup solver 29978f7fce3SZach Atkins PetscCall(KSPCreate(comm, &ksp)); 30078f7fce3SZach Atkins { 30178f7fce3SZach Atkins PC pc; 30278f7fce3SZach Atkins PetscCall(KSPGetPC(ksp, &pc)); 30378f7fce3SZach Atkins if (bp_choice == CEED_BP1 || bp_choice == CEED_BP2) { 30478f7fce3SZach Atkins PetscCall(PCSetType(pc, PCJACOBI)); 3059c674643SJeremy L Thompson PetscCall(PCJacobiSetType(pc, PC_JACOBI_DIAGONAL)); 30678f7fce3SZach Atkins } else { 30778f7fce3SZach Atkins PetscCall(PCSetType(pc, PCNONE)); 30878f7fce3SZach Atkins } 30978f7fce3SZach Atkins PetscCall(KSPSetType(ksp, KSPCG)); 31078f7fce3SZach Atkins PetscCall(KSPSetNormType(ksp, KSP_NORM_NATURAL)); 31178f7fce3SZach Atkins PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT)); 31278f7fce3SZach Atkins } 31378f7fce3SZach Atkins PetscCall(KSPSetFromOptions(ksp)); 31478f7fce3SZach Atkins PetscCall(KSPSetOperators(ksp, mat_O, mat_O)); 31578f7fce3SZach Atkins 31678f7fce3SZach Atkins // First run, if benchmarking 31778f7fce3SZach Atkins if (benchmark_mode) { 31878f7fce3SZach Atkins PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1)); 31978f7fce3SZach Atkins my_rt_start = MPI_Wtime(); 32078f7fce3SZach Atkins PetscCall(KSPSolve(ksp, rhs, X)); 32178f7fce3SZach Atkins my_rt = MPI_Wtime() - my_rt_start; 32278f7fce3SZach Atkins PetscCall(MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm)); 32378f7fce3SZach Atkins // Set maxits based on first iteration timing 32478f7fce3SZach Atkins if (my_rt > 0.02) { 32578f7fce3SZach Atkins PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5)); 32678f7fce3SZach Atkins } else { 32778f7fce3SZach Atkins PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20)); 32878f7fce3SZach Atkins } 32978f7fce3SZach Atkins } 33078f7fce3SZach Atkins 33178f7fce3SZach Atkins // Timed solve 33278f7fce3SZach Atkins PetscCall(VecZeroEntries(X)); 33378f7fce3SZach Atkins PetscCall(PetscBarrier((PetscObject)ksp)); 33478f7fce3SZach Atkins 33578f7fce3SZach Atkins // -- Performance logging 33678f7fce3SZach Atkins PetscCall(PetscLogStageRegister("Solve Stage", &solve_stage)); 33778f7fce3SZach Atkins PetscCall(PetscLogStagePush(solve_stage)); 33878f7fce3SZach Atkins 33978f7fce3SZach Atkins // -- Solve 34078f7fce3SZach Atkins my_rt_start = MPI_Wtime(); 34178f7fce3SZach Atkins PetscCall(KSPSolve(ksp, rhs, X)); 34278f7fce3SZach Atkins my_rt = MPI_Wtime() - my_rt_start; 34378f7fce3SZach Atkins 34478f7fce3SZach Atkins // -- Performance logging 34578f7fce3SZach Atkins PetscCall(PetscLogStagePop()); 34678f7fce3SZach Atkins 34778f7fce3SZach Atkins // Output results 34878f7fce3SZach Atkins { 34978f7fce3SZach Atkins KSPType ksp_type; 35078f7fce3SZach Atkins KSPConvergedReason reason; 35178f7fce3SZach Atkins PetscReal rnorm; 35278f7fce3SZach Atkins PetscInt its; 35378f7fce3SZach Atkins PetscCall(KSPGetType(ksp, &ksp_type)); 35478f7fce3SZach Atkins PetscCall(KSPGetConvergedReason(ksp, &reason)); 35578f7fce3SZach Atkins PetscCall(KSPGetIterationNumber(ksp, &its)); 35678f7fce3SZach Atkins PetscCall(KSPGetResidualNorm(ksp, &rnorm)); 35778f7fce3SZach Atkins if (!test_mode || reason < 0 || rnorm > 1e-8) { 35878f7fce3SZach Atkins PetscCall(PetscPrintf(comm, 35978f7fce3SZach Atkins " KSP:\n" 36078f7fce3SZach Atkins " KSP Type : %s\n" 36178f7fce3SZach Atkins " KSP Convergence : %s\n" 36278f7fce3SZach Atkins " Total KSP Iterations : %" PetscInt_FMT "\n" 36378f7fce3SZach Atkins " Final rnorm : %e\n", 36478f7fce3SZach Atkins ksp_type, KSPConvergedReasons[reason], its, (double)rnorm)); 36578f7fce3SZach Atkins } 36678f7fce3SZach Atkins if (!test_mode) { 36778f7fce3SZach Atkins PetscCall(PetscPrintf(comm, " Performance:\n")); 36878f7fce3SZach Atkins } 36978f7fce3SZach Atkins 37078f7fce3SZach Atkins // View true solution at particles 37178f7fce3SZach Atkins if (write_true_solution_swarm) { 37278f7fce3SZach Atkins Vec u_swarm, u_swarm_old; 37378f7fce3SZach Atkins PetscCall(DMSwarmSortGetAccess(dm_swarm)); 37478f7fce3SZach Atkins PetscCall(DMSwarmCreateLocalVectorFromField(dm_swarm, DMSwarmPICField_u, &u_swarm)); 37578f7fce3SZach Atkins PetscCall(VecDuplicate(u_swarm, &u_swarm_old)); 37678f7fce3SZach Atkins PetscCall(VecCopy(u_swarm, u_swarm_old)); 37778f7fce3SZach Atkins PetscCall(VecCopy(target, u_swarm)); 37878f7fce3SZach Atkins PetscCall(DMSwarmDestroyLocalVectorFromField(dm_swarm, DMSwarmPICField_u, &u_swarm)); 37978f7fce3SZach Atkins PetscCall(DMSwarmSortRestoreAccess(dm_swarm)); 38078f7fce3SZach Atkins 38178f7fce3SZach Atkins PetscCall(DMSwarmViewXDMF(dm_swarm, "swarm.xmf")); 38278f7fce3SZach Atkins 38378f7fce3SZach Atkins PetscCall(DMSwarmSortGetAccess(dm_swarm)); 38478f7fce3SZach Atkins PetscCall(DMSwarmCreateLocalVectorFromField(dm_swarm, DMSwarmPICField_u, &u_swarm)); 38578f7fce3SZach Atkins PetscCall(VecCopy(u_swarm_old, u_swarm)); 38678f7fce3SZach Atkins PetscCall(DMSwarmDestroyLocalVectorFromField(dm_swarm, DMSwarmPICField_u, &u_swarm)); 38778f7fce3SZach Atkins PetscCall(DMSwarmSortRestoreAccess(dm_swarm)); 38878f7fce3SZach Atkins PetscCall(VecDestroy(&u_swarm_old)); 38978f7fce3SZach Atkins } 39078f7fce3SZach Atkins 39178f7fce3SZach Atkins // View solution at mesh points 39278f7fce3SZach Atkins PetscCall(VecViewFromOptions(X, NULL, "-solution_view")); 39378f7fce3SZach Atkins 39478f7fce3SZach Atkins // Compute L2 Error 39578f7fce3SZach Atkins { 39678f7fce3SZach Atkins // Set up error operator context 39778f7fce3SZach Atkins PetscCall(SetupErrorOperatorCtx(comm, dm_mesh, ceed, ceed_data, X_loc, op_error, op_error_ctx)); 39878f7fce3SZach Atkins PetscScalar l2_error; 39978f7fce3SZach Atkins PetscCall(ComputeL2Error(X, &l2_error, op_error_ctx)); 40078f7fce3SZach Atkins 40107a9c11fSZach Atkins if (!test_mode || l2_error > tolerance) { 40278f7fce3SZach Atkins PetscCall(MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm)); 40378f7fce3SZach Atkins PetscCall(MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm)); 40478f7fce3SZach Atkins PetscCall(PetscPrintf(comm, 40578f7fce3SZach Atkins " L2 Error : %e\n" 40678f7fce3SZach Atkins " CG Solve Time : %g (%g) sec\n", 40778f7fce3SZach Atkins (double)l2_error, rt_max, rt_min)); 40878f7fce3SZach Atkins } 40978f7fce3SZach Atkins } 41078f7fce3SZach Atkins if (benchmark_mode && (!test_mode)) { 41178f7fce3SZach Atkins PetscCall(PetscPrintf(comm, " DoFs/Sec in CG : %g (%g) million\n", 1e-6 * g_size * its / rt_max, 41278f7fce3SZach Atkins 1e-6 * g_size * its / rt_min)); 41378f7fce3SZach Atkins } 41478f7fce3SZach Atkins } 41578f7fce3SZach Atkins 41678f7fce3SZach Atkins // Output solution 41778f7fce3SZach Atkins if (write_solution) { 41878f7fce3SZach Atkins PetscViewer vtk_viewer_soln; 41978f7fce3SZach Atkins 42078f7fce3SZach Atkins PetscCall(PetscViewerCreate(comm, &vtk_viewer_soln)); 42178f7fce3SZach Atkins PetscCall(PetscViewerSetType(vtk_viewer_soln, PETSCVIEWERVTK)); 42278f7fce3SZach Atkins PetscCall(PetscViewerFileSetName(vtk_viewer_soln, "solution.vtu")); 42378f7fce3SZach Atkins PetscCall(VecView(X, vtk_viewer_soln)); 42478f7fce3SZach Atkins PetscCall(PetscViewerDestroy(&vtk_viewer_soln)); 42578f7fce3SZach Atkins } 42678f7fce3SZach Atkins 42778f7fce3SZach Atkins // Cleanup 42878f7fce3SZach Atkins PetscCall(VecDestroy(&X)); 42978f7fce3SZach Atkins PetscCall(VecDestroy(&X_loc)); 43078f7fce3SZach Atkins PetscCall(VecDestroy(&op_apply_ctx->Y_loc)); 43178f7fce3SZach Atkins PetscCall(VecDestroy(&op_error_ctx->Y_loc)); 43278f7fce3SZach Atkins PetscCall(MatDestroy(&mat_O)); 43378f7fce3SZach Atkins PetscCall(PetscFree(op_apply_ctx)); 43478f7fce3SZach Atkins PetscCall(PetscFree(op_error_ctx)); 43578f7fce3SZach Atkins PetscCall(CeedDataDestroy(0, ceed_data)); 43678f7fce3SZach Atkins PetscCall(DMDestroy(&dm_mesh)); 43778f7fce3SZach Atkins PetscCall(DMDestroy(&dm_swarm)); 43878f7fce3SZach Atkins 43978f7fce3SZach Atkins PetscCall(VecDestroy(&rhs)); 44078f7fce3SZach Atkins PetscCall(VecDestroy(&target)); 44178f7fce3SZach Atkins PetscCall(KSPDestroy(&ksp)); 44278f7fce3SZach Atkins CeedOperatorDestroy(&op_error); 44378f7fce3SZach Atkins CeedDestroy(&ceed); 44478f7fce3SZach Atkins return PetscFinalize(); 44578f7fce3SZach Atkins } 446