1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 #include <petsc.h> 16 17 #define PC_MPI_MAX_RANKS 256 18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 19 20 typedef struct { 21 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 22 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 23 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 24 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 25 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 26 } PC_MPI; 27 28 typedef enum { 29 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 30 PCMPI_CREATE, 31 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 32 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 33 PCMPI_SOLVE, 34 PCMPI_VIEW, 35 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 36 } PCMPICommand; 37 38 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 39 static PetscBool PCMPICommSet = PETSC_FALSE; 40 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 41 42 static PetscErrorCode PCMPICommsCreate(void) { 43 MPI_Comm comm = PC_MPI_COMM_WORLD; 44 PetscMPIInt size, rank, i; 45 46 PetscFunctionBegin; 47 PetscCallMPI(MPI_Comm_size(comm, &size)); 48 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 49 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 50 /* comm for size 1 is useful only for debugging */ 51 for (i = 0; i < size; i++) { 52 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 53 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 54 PCMPISolveCounts[i] = 0; 55 PCMPIKSPCounts[i] = 0; 56 } 57 PCMPICommSet = PETSC_TRUE; 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode PCMPICommsDestroy(void) { 62 MPI_Comm comm = PC_MPI_COMM_WORLD; 63 PetscMPIInt size, rank, i; 64 65 PetscFunctionBegin; 66 if (!PCMPICommSet) PetscFunctionReturn(0); 67 PetscCallMPI(MPI_Comm_size(comm, &size)); 68 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 69 for (i = 0; i < size; i++) { 70 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 71 } 72 PCMPICommSet = PETSC_FALSE; 73 PetscFunctionReturn(0); 74 } 75 76 static PetscErrorCode PCMPICreate(PC pc) { 77 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 78 MPI_Comm comm = PC_MPI_COMM_WORLD; 79 KSP ksp; 80 PetscInt N[2], mincntperrank = 0; 81 PetscMPIInt size; 82 Mat sA; 83 char *prefix; 84 PetscMPIInt len = 0; 85 86 PetscFunctionBegin; 87 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 88 PetscCallMPI(MPI_Comm_size(comm, &size)); 89 if (pc) { 90 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 91 PetscCall(PCGetOperators(pc, &sA, &sA)); 92 PetscCall(MatGetSize(sA, &N[0], &N[1])); 93 } 94 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 95 96 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 97 if (km) mincntperrank = km->mincntperrank; 98 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 99 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 100 if (comm == MPI_COMM_NULL) { 101 ksp = NULL; 102 PetscFunctionReturn(0); 103 } 104 PetscCall(KSPCreate(comm, &ksp)); 105 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 106 if (pc) { 107 size_t slen; 108 109 PetscCallMPI(MPI_Comm_size(comm, &size)); 110 PCMPIKSPCounts[size - 1]++; 111 PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix)); 112 PetscCall(PetscStrlen(prefix, &slen)); 113 len = (PetscMPIInt)slen; 114 } 115 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 116 if (len) { 117 if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix)); 118 PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm)); 119 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 120 } 121 PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_")); 122 PetscFunctionReturn(0); 123 } 124 125 static PetscErrorCode PCMPISetMat(PC pc) { 126 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 127 Mat A; 128 PetscInt N[2], n, *ia, *ja, j, bs; 129 Mat sA; 130 MPI_Comm comm = PC_MPI_COMM_WORLD; 131 KSP ksp; 132 PetscLayout layout; 133 const PetscInt *IA = NULL, *JA = NULL; 134 const PetscInt *range; 135 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 136 PetscScalar *a; 137 const PetscScalar *sa = NULL; 138 139 PetscFunctionBegin; 140 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 141 if (!ksp) PetscFunctionReturn(0); 142 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 143 if (pc) { 144 PetscCallMPI(MPI_Comm_size(comm, &size)); 145 PCMPIMatCounts[size - 1]++; 146 PetscCall(PCGetOperators(pc, &sA, &sA)); 147 PetscCall(MatGetSize(sA, &N[0], &N[1])); 148 PetscCall(MatGetBlockSize(sA, &bs)); 149 /* need to broadcast symmetry flags etc if set */ 150 } 151 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 152 PetscCallMPI(MPI_Bcast(&bs, 1, MPIU_INT, 0, comm)); 153 154 /* determine ownership ranges of matrix */ 155 PetscCall(PetscLayoutCreate(comm, &layout)); 156 PetscCall(PetscLayoutSetBlockSize(layout, bs)); 157 PetscCall(PetscLayoutSetSize(layout, N[0])); 158 PetscCall(PetscLayoutSetUp(layout)); 159 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 160 161 /* copy over the matrix nonzero structure and values */ 162 if (pc) { 163 NZ = km->NZ; 164 NZdispl = km->NZdispl; 165 PetscCall(PetscLayoutGetRanges(layout, &range)); 166 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 167 for (i = 0; i < size; i++) { 168 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 169 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 170 } 171 displi[0] = 0; 172 NZdispl[0] = 0; 173 for (j = 1; j < size; j++) { 174 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 175 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 176 } 177 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 178 } 179 PetscCall(PetscLayoutDestroy(&layout)); 180 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 181 182 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 183 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 184 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 185 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 186 187 if (pc) { 188 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 189 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 190 } 191 192 for (j = 1; j < n + 1; j++) ia[j] -= ia[0]; 193 ia[0] = 0; 194 PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A)); 195 PetscCall(MatSetBlockSize(A, bs)); 196 PetscCall(MatSetOptionsPrefix(A, "mpi_")); 197 198 PetscCall(PetscFree3(ia, ja, a)); 199 PetscCall(KSPSetOperators(ksp, A, A)); 200 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 201 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 202 const PetscInt *range; 203 204 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 205 for (i = 0; i < size; i++) { 206 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 207 km->displ[i] = (PetscMPIInt)range[i]; 208 } 209 } 210 PetscCall(MatDestroy(&A)); 211 PetscCall(KSPSetFromOptions(ksp)); 212 PetscFunctionReturn(0); 213 } 214 215 static PetscErrorCode PCMPIUpdateMatValues(PC pc) { 216 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 217 KSP ksp; 218 Mat sA, A; 219 MPI_Comm comm = PC_MPI_COMM_WORLD; 220 PetscScalar *a; 221 PetscCount nz; 222 const PetscScalar *sa = NULL; 223 PetscMPIInt size; 224 225 PetscFunctionBegin; 226 if (pc) { 227 PetscCall(PCGetOperators(pc, &sA, &sA)); 228 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 229 } 230 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 231 if (!ksp) PetscFunctionReturn(0); 232 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 233 PetscCallMPI(MPI_Comm_size(comm, &size)); 234 PCMPIMatCounts[size - 1]++; 235 PetscCall(KSPGetOperators(ksp, NULL, &A)); 236 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 237 PetscCall(PetscMalloc1(nz, &a)); 238 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 239 if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 240 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 241 PetscCall(PetscFree(a)); 242 PetscFunctionReturn(0); 243 } 244 245 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) { 246 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 247 KSP ksp; 248 MPI_Comm comm = PC_MPI_COMM_WORLD; 249 const PetscScalar *sb = NULL, *x; 250 PetscScalar *b, *sx = NULL; 251 PetscInt n; 252 253 PetscFunctionBegin; 254 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 255 if (!ksp) PetscFunctionReturn(0); 256 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 257 258 /* TODO: optimize code to not require building counts/displ everytime */ 259 260 /* scatterv rhs */ 261 if (pc) { 262 PetscMPIInt size; 263 264 PetscCallMPI(MPI_Comm_size(comm, &size)); 265 PCMPISolveCounts[size - 1]++; 266 PetscCall(VecGetArrayRead(B, &sb)); 267 } 268 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 269 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 270 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 271 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 272 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 273 274 PetscCall(KSPSolve(ksp, NULL, NULL)); 275 276 /* gather solution */ 277 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 278 if (pc) PetscCall(VecGetArray(X, &sx)); 279 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 280 if (pc) PetscCall(VecRestoreArray(X, &sx)); 281 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 282 PetscFunctionReturn(0); 283 } 284 285 static PetscErrorCode PCMPIDestroy(PC pc) { 286 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 287 KSP ksp; 288 MPI_Comm comm = PC_MPI_COMM_WORLD; 289 290 PetscFunctionBegin; 291 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 292 if (!ksp) PetscFunctionReturn(0); 293 PetscCall(KSPDestroy(&ksp)); 294 PetscFunctionReturn(0); 295 } 296 297 /*@C 298 PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for 299 parallel `KSP` solves and management of parallel `KSP` objects. 300 301 Logically collective on all MPI ranks except 0 302 303 Options Database Keys: 304 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 305 - -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 306 307 Note: 308 This is normally started automatically in `PetscInitialize()` when the option is provided 309 310 Developer Notes: 311 When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 312 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 313 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 314 315 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 316 317 Level: developer 318 319 .seealso: `PCMPIServerEnd()`, `PCMPI` 320 @*/ 321 PetscErrorCode PCMPIServerBegin(void) { 322 PetscMPIInt rank; 323 324 PetscFunctionBegin; 325 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server")); 326 if (PetscDefined(USE_SINGLE_LIBRARY)) { 327 PetscCall(VecInitializePackage()); 328 PetscCall(MatInitializePackage()); 329 PetscCall(DMInitializePackage()); 330 PetscCall(PCInitializePackage()); 331 PetscCall(KSPInitializePackage()); 332 PetscCall(SNESInitializePackage()); 333 PetscCall(TSInitializePackage()); 334 PetscCall(TaoInitializePackage()); 335 } 336 337 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 338 if (rank == 0) { 339 PETSC_COMM_WORLD = PETSC_COMM_SELF; 340 PetscFunctionReturn(0); 341 } 342 343 while (PETSC_TRUE) { 344 PCMPICommand request = PCMPI_CREATE; 345 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 346 switch (request) { 347 case PCMPI_CREATE: PetscCall(PCMPICreate(NULL)); break; 348 case PCMPI_SET_MAT: PetscCall(PCMPISetMat(NULL)); break; 349 case PCMPI_UPDATE_MAT_VALUES: PetscCall(PCMPIUpdateMatValues(NULL)); break; 350 case PCMPI_VIEW: 351 // PetscCall(PCMPIView(NULL)); 352 break; 353 case PCMPI_SOLVE: PetscCall(PCMPISolve(NULL, NULL, NULL)); break; 354 case PCMPI_DESTROY: PetscCall(PCMPIDestroy(NULL)); break; 355 case PCMPI_EXIT: 356 PetscCall(PetscFinalize()); 357 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 358 break; 359 default: break; 360 } 361 } 362 PetscFunctionReturn(0); 363 } 364 365 /*@C 366 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 367 parallel KSP solves and management of parallel `KSP` objects. 368 369 Logically collective on all MPI ranks except 0 370 371 Note: 372 This is normally ended automatically in `PetscFinalize()` when the option is provided 373 374 Level: developer 375 376 .seealso: `PCMPIServerBegin()`, `PCMPI` 377 @*/ 378 PetscErrorCode PCMPIServerEnd(void) { 379 PCMPICommand request = PCMPI_EXIT; 380 381 PetscFunctionBegin; 382 if (PetscGlobalRank == 0) { 383 PetscViewer viewer = NULL; 384 PetscViewerFormat format; 385 386 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 387 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 388 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 389 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 390 PetscOptionsEnd(); 391 if (viewer) { 392 PetscBool isascii; 393 394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 395 if (isascii) { 396 PetscMPIInt size; 397 PetscInt i, ksp = 0, mat = 0, solve = 0; 398 399 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 400 for (i = 0; i < size; i++) { 401 ksp += PCMPIKSPCounts[i]; 402 mat += PCMPIMatCounts[i]; 403 solve += PCMPISolveCounts[i]; 404 } 405 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server:\n")); 406 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel KSPs %" PetscInt_FMT " Mats %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", ksp, mat, solve)); 407 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential KSPs %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", PCMPIKSPCountsSeq, PCMPISolveCountsSeq)); 408 } 409 PetscCall(PetscViewerDestroy(&viewer)); 410 } 411 } 412 PetscCall(PCMPICommsDestroy()); 413 PetscFunctionReturn(0); 414 } 415 416 /* 417 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 418 because, for example, the problem is small. This version is more efficient because it does not require copying any data 419 */ 420 static PetscErrorCode PCSetUp_Seq(PC pc) { 421 PC_MPI *km = (PC_MPI *)pc->data; 422 Mat sA; 423 const char *prefix; 424 425 PetscFunctionBegin; 426 PetscCall(PCGetOperators(pc, NULL, &sA)); 427 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 428 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 429 PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix)); 430 PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_")); 431 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 432 PetscCall(KSPSetFromOptions(km->ksps[0])); 433 PetscCall(KSPSetUp(km->ksps[0])); 434 PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"); 435 PCMPIKSPCountsSeq++; 436 PetscFunctionReturn(0); 437 } 438 439 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) { 440 PC_MPI *km = (PC_MPI *)pc->data; 441 442 PetscFunctionBegin; 443 PetscCall(KSPSolve(km->ksps[0], b, x)); 444 PCMPISolveCountsSeq++; 445 PetscFunctionReturn(0); 446 } 447 448 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) { 449 PC_MPI *km = (PC_MPI *)pc->data; 450 const char *prefix; 451 452 PetscFunctionBegin; 453 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 454 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 455 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 456 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix)); 457 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n")); 458 PetscFunctionReturn(0); 459 } 460 461 static PetscErrorCode PCDestroy_Seq(PC pc) { 462 PC_MPI *km = (PC_MPI *)pc->data; 463 464 PetscFunctionBegin; 465 PetscCall(KSPDestroy(&km->ksps[0])); 466 PetscCall(PetscFree(pc->data)); 467 PetscFunctionReturn(0); 468 } 469 470 /* 471 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 472 right hand side to the parallel PC 473 */ 474 static PetscErrorCode PCSetUp_MPI(PC pc) { 475 PC_MPI *km = (PC_MPI *)pc->data; 476 PetscMPIInt rank, size; 477 PCMPICommand request; 478 PetscBool newmatrix = PETSC_FALSE; 479 480 PetscFunctionBegin; 481 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 482 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 483 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 484 485 if (!pc->setupcalled) { 486 if (!km->alwaysuseserver) { 487 PetscInt n; 488 Mat sA; 489 /* short circuit for small systems */ 490 PetscCall(PCGetOperators(pc, &sA, &sA)); 491 PetscCall(MatGetSize(sA, &n, NULL)); 492 if (n < 2 * km->mincntperrank - 1 || size == 1) { 493 pc->ops->setup = NULL; 494 pc->ops->apply = PCApply_Seq; 495 pc->ops->destroy = PCDestroy_Seq; 496 pc->ops->view = PCView_Seq; 497 PetscCall(PCSetUp_Seq(pc)); 498 PetscFunctionReturn(0); 499 } 500 } 501 502 request = PCMPI_CREATE; 503 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 504 PetscCall(PCMPICreate(pc)); 505 newmatrix = PETSC_TRUE; 506 } 507 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 508 509 if (newmatrix) { 510 PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"); 511 request = PCMPI_SET_MAT; 512 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 513 PetscCall(PCMPISetMat(pc)); 514 } else { 515 PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n"); 516 request = PCMPI_UPDATE_MAT_VALUES; 517 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 518 PetscCall(PCMPIUpdateMatValues(pc)); 519 } 520 PetscFunctionReturn(0); 521 } 522 523 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) { 524 PCMPICommand request = PCMPI_SOLVE; 525 526 PetscFunctionBegin; 527 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 528 PetscCall(PCMPISolve(pc, b, x)); 529 PetscFunctionReturn(0); 530 } 531 532 PetscErrorCode PCDestroy_MPI(PC pc) { 533 PCMPICommand request = PCMPI_DESTROY; 534 535 PetscFunctionBegin; 536 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 537 PetscCall(PCMPIDestroy(pc)); 538 PetscCall(PetscFree(pc->data)); 539 PetscFunctionReturn(0); 540 } 541 542 /* 543 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 544 */ 545 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) { 546 PC_MPI *km = (PC_MPI *)pc->data; 547 MPI_Comm comm; 548 PetscMPIInt size; 549 const char *prefix; 550 551 PetscFunctionBegin; 552 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 553 PetscCallMPI(MPI_Comm_size(comm, &size)); 554 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 555 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 556 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 557 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix)); 558 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n")); 559 PetscFunctionReturn(0); 560 } 561 562 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) { 563 PC_MPI *km = (PC_MPI *)pc->data; 564 565 PetscFunctionBegin; 566 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 567 PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 568 PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 569 PetscOptionsHeadEnd(); 570 PetscFunctionReturn(0); 571 } 572 573 /*MC 574 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 575 576 Level: beginner 577 578 Options Database Keys: 579 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 580 . -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 581 . -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 582 - -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes 583 584 Notes: 585 The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_ 586 587 The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for 588 potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware. 589 590 It can be particularly useful for user OpenMP code or potentially user GPU code. 591 592 When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi) 593 and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly. 594 595 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()` 596 M*/ 597 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) { 598 PC_MPI *km; 599 600 PetscFunctionBegin; 601 PetscCall(PetscNew(&km)); 602 pc->data = (void *)km; 603 604 km->mincntperrank = 10000; 605 606 pc->ops->setup = PCSetUp_MPI; 607 pc->ops->apply = PCApply_MPI; 608 pc->ops->destroy = PCDestroy_MPI; 609 pc->ops->view = PCView_MPI; 610 pc->ops->setfromoptions = PCSetFromOptions_MPI; 611 PetscFunctionReturn(0); 612 } 613