1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 16 #define PC_MPI_MAX_RANKS 256 17 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 18 19 typedef struct { 20 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 21 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 22 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 23 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 24 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 25 } PC_MPI; 26 27 typedef enum { 28 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 29 PCMPI_CREATE, 30 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 31 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 32 PCMPI_SOLVE, 33 PCMPI_VIEW, 34 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 35 } PCMPICommand; 36 37 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 38 static PetscBool PCMPICommSet = PETSC_FALSE; 39 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 40 41 static PetscErrorCode PCMPICommsCreate(void) { 42 MPI_Comm comm = PC_MPI_COMM_WORLD; 43 PetscMPIInt size, rank, i; 44 45 PetscFunctionBegin; 46 PetscCallMPI(MPI_Comm_size(comm, &size)); 47 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 48 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 49 /* comm for size 1 is useful only for debugging */ 50 for (i = 0; i < size; i++) { 51 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 52 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 53 PCMPISolveCounts[i] = 0; 54 PCMPIKSPCounts[i] = 0; 55 } 56 PCMPICommSet = PETSC_TRUE; 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode PCMPICommsDestroy(void) { 61 MPI_Comm comm = PC_MPI_COMM_WORLD; 62 PetscMPIInt size, rank, i; 63 64 PetscFunctionBegin; 65 if (!PCMPICommSet) PetscFunctionReturn(0); 66 PetscCallMPI(MPI_Comm_size(comm, &size)); 67 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 68 for (i = 0; i < size; i++) { 69 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 70 } 71 PCMPICommSet = PETSC_FALSE; 72 PetscFunctionReturn(0); 73 } 74 75 static PetscErrorCode PCMPICreate(PC pc) { 76 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 77 MPI_Comm comm = PC_MPI_COMM_WORLD; 78 KSP ksp; 79 PetscInt N[2], mincntperrank = 0; 80 PetscMPIInt size; 81 Mat sA; 82 char *prefix; 83 PetscMPIInt len = 0; 84 85 PetscFunctionBegin; 86 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 87 PetscCallMPI(MPI_Comm_size(comm, &size)); 88 if (pc) { 89 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 90 PetscCall(PCGetOperators(pc, &sA, &sA)); 91 PetscCall(MatGetSize(sA, &N[0], &N[1])); 92 } 93 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 94 95 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 96 if (km) mincntperrank = km->mincntperrank; 97 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 98 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 99 if (comm == MPI_COMM_NULL) { 100 ksp = NULL; 101 PetscFunctionReturn(0); 102 } 103 PetscCall(KSPCreate(comm, &ksp)); 104 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 105 if (pc) { 106 size_t slen; 107 108 PetscCallMPI(MPI_Comm_size(comm, &size)); 109 PCMPIKSPCounts[size]++; 110 PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix)); 111 PetscCall(PetscStrlen(prefix, &slen)); 112 len = (PetscMPIInt)slen; 113 } 114 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 115 if (len) { 116 if (!pc) { PetscCall(PetscMalloc1(len + 1, &prefix)); } 117 PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm)); 118 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 119 } 120 PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_")); 121 PetscFunctionReturn(0); 122 } 123 124 static PetscErrorCode PCMPISetMat(PC pc) { 125 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 126 Mat A; 127 PetscInt N[2], n, *ia, *ja, j; 128 Mat sA; 129 MPI_Comm comm = PC_MPI_COMM_WORLD; 130 KSP ksp; 131 PetscLayout layout; 132 const PetscInt *IA = NULL, *JA = NULL; 133 const PetscInt *range; 134 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 135 PetscScalar *a; 136 const PetscScalar *sa = NULL; 137 138 PetscFunctionBegin; 139 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 140 if (!ksp) PetscFunctionReturn(0); 141 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 142 if (pc) { 143 PetscCallMPI(MPI_Comm_size(comm, &size)); 144 PCMPIMatCounts[size]++; 145 PetscCall(PCGetOperators(pc, &sA, &sA)); 146 PetscCall(MatGetSize(sA, &N[0], &N[1])); 147 } 148 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 149 150 /* determine ownership ranges of matrix */ 151 PetscCall(PetscLayoutCreate(comm, &layout)); 152 PetscCall(PetscLayoutSetBlockSize(layout, 1)); 153 PetscCall(PetscLayoutSetSize(layout, N[0])); 154 PetscCall(PetscLayoutSetUp(layout)); 155 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 156 157 /* copy over the matrix nonzero structure and values */ 158 if (pc) { 159 NZ = km->NZ; 160 NZdispl = km->NZdispl; 161 PetscCall(PetscLayoutGetRanges(layout, &range)); 162 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 163 for (i = 0; i < size; i++) { 164 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 165 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 166 } 167 displi[0] = 0; 168 NZdispl[0] = 0; 169 for (j = 1; j < size; j++) { 170 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 171 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 172 } 173 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 174 } 175 PetscCall(PetscLayoutDestroy(&layout)); 176 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 177 178 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 179 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 180 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 181 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 182 183 if (pc) { 184 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 185 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 186 } 187 188 for (j = 1; j < n + 1; j++) { ia[j] -= ia[0]; } 189 ia[0] = 0; 190 PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A)); 191 PetscCall(MatSetOptionsPrefix(A, "mpi_")); 192 193 PetscCall(PetscFree3(ia, ja, a)); 194 PetscCall(KSPSetOperators(ksp, A, A)); 195 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 196 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 197 const PetscInt *range; 198 199 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 200 for (i = 0; i < size; i++) { 201 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 202 km->displ[i] = (PetscMPIInt)range[i]; 203 } 204 } 205 PetscCall(MatDestroy(&A)); 206 PetscCall(KSPSetFromOptions(ksp)); 207 PetscFunctionReturn(0); 208 } 209 210 static PetscErrorCode PCMPIUpdateMatValues(PC pc) { 211 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 212 KSP ksp; 213 Mat sA, A; 214 MPI_Comm comm = PC_MPI_COMM_WORLD; 215 PetscScalar *a; 216 PetscCount nz; 217 const PetscScalar *sa = NULL; 218 219 PetscFunctionBegin; 220 if (pc) { 221 PetscMPIInt size; 222 223 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 224 PCMPIMatCounts[size]++; 225 PetscCall(PCGetOperators(pc, &sA, &sA)); 226 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 227 } 228 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 229 if (!ksp) PetscFunctionReturn(0); 230 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 231 PetscCall(KSPGetOperators(ksp, NULL, &A)); 232 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 233 PetscCall(PetscMalloc1(nz, &a)); 234 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 235 if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 236 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 237 PetscCall(PetscFree(a)); 238 PetscFunctionReturn(0); 239 } 240 241 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) { 242 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 243 KSP ksp; 244 MPI_Comm comm = PC_MPI_COMM_WORLD; 245 const PetscScalar *sb = NULL, *x; 246 PetscScalar *b, *sx = NULL; 247 PetscInt n; 248 249 PetscFunctionBegin; 250 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 251 if (!ksp) PetscFunctionReturn(0); 252 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 253 254 /* TODO: optimize code to not require building counts/displ everytime */ 255 256 /* scatterv rhs */ 257 if (pc) { 258 PetscMPIInt size; 259 260 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 261 PCMPISolveCounts[size]++; 262 PetscCall(VecGetArrayRead(B, &sb)); 263 } 264 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 265 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 266 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 267 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 268 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 269 270 PetscCall(KSPSolve(ksp, NULL, NULL)); 271 272 /* gather solution */ 273 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 274 if (pc) PetscCall(VecGetArray(X, &sx)); 275 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 276 if (pc) PetscCall(VecRestoreArray(X, &sx)); 277 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 278 PetscFunctionReturn(0); 279 } 280 281 static PetscErrorCode PCMPIDestroy(PC pc) { 282 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 283 KSP ksp; 284 MPI_Comm comm = PC_MPI_COMM_WORLD; 285 286 PetscFunctionBegin; 287 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 288 if (!ksp) PetscFunctionReturn(0); 289 PetscCall(KSPDestroy(&ksp)); 290 PetscFunctionReturn(0); 291 } 292 293 /*@C 294 PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for 295 parallel KSP solves and management of parallel KSP objects. 296 297 Logically collective on all MPI ranks except 0 298 299 Options Database: 300 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 301 - -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 302 303 Notes: 304 This is normally started automatically in `PetscInitialize()` when the option is provided 305 306 Developer Notes: 307 When called on rank zero this sets PETSC_COMM_WORLD to PETSC_COMM_SELF to allow a main program 308 written with PETSC_COMM_WORLD to run correctly on the single rank while all the ranks 309 (that would normally be sharing PETSC_COMM_WORLD) to run the solver server. 310 311 Can this be integrated into the PetscDevice abstraction that is currently being developed? 312 313 Level: developer 314 315 .seealso: PCMPIServerEnd(), PCMPI 316 @*/ 317 PetscErrorCode PCMPIServerBegin(void) { 318 PetscMPIInt rank; 319 320 PetscFunctionBegin; 321 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server")); 322 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 323 if (rank == 0) { 324 PETSC_COMM_WORLD = PETSC_COMM_SELF; 325 PetscFunctionReturn(0); 326 } 327 328 while (PETSC_TRUE) { 329 PCMPICommand request = PCMPI_CREATE; 330 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 331 switch (request) { 332 case PCMPI_CREATE: PetscCall(PCMPICreate(NULL)); break; 333 case PCMPI_SET_MAT: PetscCall(PCMPISetMat(NULL)); break; 334 case PCMPI_UPDATE_MAT_VALUES: PetscCall(PCMPIUpdateMatValues(NULL)); break; 335 case PCMPI_VIEW: 336 // PetscCall(PCMPIView(NULL)); 337 break; 338 case PCMPI_SOLVE: PetscCall(PCMPISolve(NULL, NULL, NULL)); break; 339 case PCMPI_DESTROY: PetscCall(PCMPIDestroy(NULL)); break; 340 case PCMPI_EXIT: 341 PetscCall(PetscFinalize()); 342 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 343 break; 344 default: break; 345 } 346 } 347 PetscFunctionReturn(0); 348 } 349 350 /*@C 351 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 352 parallel KSP solves and management of parallel KSP objects. 353 354 Logically collective on all MPI ranks except 0 355 356 Notes: 357 This is normally ended automatically in `PetscFinalize()` when the option is provided 358 359 Level: developer 360 361 .seealso: PCMPIServerBegin(), PCMPI 362 @*/ 363 PetscErrorCode PCMPIServerEnd(void) { 364 PCMPICommand request = PCMPI_EXIT; 365 366 PetscFunctionBegin; 367 if (PetscGlobalRank == 0) { 368 PetscViewer viewer = NULL; 369 PetscViewerFormat format; 370 371 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 372 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 373 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 374 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 375 PetscOptionsEnd(); 376 if (viewer) { 377 PetscBool isascii; 378 379 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 380 if (isascii) { 381 PetscMPIInt size; 382 PetscInt i, ksp = 0, mat = 0, solve = 0; 383 384 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 385 for (i = 1; i < size; i++) { 386 ksp += PCMPIKSPCounts[i]; 387 mat += PCMPIMatCounts[i]; 388 solve += PCMPISolveCounts[i]; 389 } 390 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server:\n")); 391 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel KSPs %" PetscInt_FMT " Mats %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", ksp, mat, solve)); 392 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential KSPs %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", PCMPIKSPCountsSeq, PCMPISolveCountsSeq)); 393 } 394 PetscCall(PetscViewerDestroy(&viewer)); 395 } 396 } 397 PetscCall(PCMPICommsDestroy()); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 403 because, for example, the problem is small. This version is more efficient because it does not require copying any data 404 */ 405 static PetscErrorCode PCSetUp_Seq(PC pc) { 406 PC_MPI *km = (PC_MPI *)pc->data; 407 Mat sA; 408 const char *prefix; 409 410 PetscFunctionBegin; 411 PetscCall(PCGetOperators(pc, NULL, &sA)); 412 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 413 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 414 PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix)); 415 PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_")); 416 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 417 PetscCall(KSPSetFromOptions(km->ksps[0])); 418 PetscCall(KSPSetUp(km->ksps[0])); 419 PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"); 420 PCMPIKSPCountsSeq++; 421 PetscFunctionReturn(0); 422 } 423 424 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) { 425 PC_MPI *km = (PC_MPI *)pc->data; 426 427 PetscFunctionBegin; 428 PetscCall(KSPSolve(km->ksps[0], b, x)); 429 PCMPISolveCountsSeq++; 430 PetscFunctionReturn(0); 431 } 432 433 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) { 434 PC_MPI *km = (PC_MPI *)pc->data; 435 const char *prefix; 436 437 PetscFunctionBegin; 438 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 439 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 440 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 441 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix)); 442 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n")); 443 PetscFunctionReturn(0); 444 } 445 446 static PetscErrorCode PCDestroy_Seq(PC pc) { 447 PC_MPI *km = (PC_MPI *)pc->data; 448 449 PetscFunctionBegin; 450 PetscCall(KSPDestroy(&km->ksps[0])); 451 PetscCall(PetscFree(pc->data)); 452 PetscFunctionReturn(0); 453 } 454 455 /* 456 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 457 right hand side to the parallel PC 458 */ 459 static PetscErrorCode PCSetUp_MPI(PC pc) { 460 PC_MPI *km = (PC_MPI *)pc->data; 461 PetscMPIInt rank, size; 462 PCMPICommand request; 463 PetscBool newmatrix = PETSC_FALSE; 464 465 PetscFunctionBegin; 466 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 467 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 468 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 469 470 if (!pc->setupcalled) { 471 if (!km->alwaysuseserver) { 472 PetscInt n; 473 Mat sA; 474 /* short circuit for small systems */ 475 PetscCall(PCGetOperators(pc, &sA, &sA)); 476 PetscCall(MatGetSize(sA, &n, NULL)); 477 if (n < 2 * km->mincntperrank - 1 || size == 1) { 478 pc->ops->setup = NULL; 479 pc->ops->apply = PCApply_Seq; 480 pc->ops->destroy = PCDestroy_Seq; 481 pc->ops->view = PCView_Seq; 482 PetscCall(PCSetUp_Seq(pc)); 483 PetscFunctionReturn(0); 484 } 485 } 486 487 request = PCMPI_CREATE; 488 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 489 PetscCall(PCMPICreate(pc)); 490 newmatrix = PETSC_TRUE; 491 } 492 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 493 494 if (newmatrix) { 495 PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"); 496 request = PCMPI_SET_MAT; 497 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 498 PetscCall(PCMPISetMat(pc)); 499 } else { 500 PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n"); 501 request = PCMPI_UPDATE_MAT_VALUES; 502 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 503 PetscCall(PCMPIUpdateMatValues(pc)); 504 } 505 PetscFunctionReturn(0); 506 } 507 508 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) { 509 PCMPICommand request = PCMPI_SOLVE; 510 511 PetscFunctionBegin; 512 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 513 PetscCall(PCMPISolve(pc, b, x)); 514 PetscFunctionReturn(0); 515 } 516 517 PetscErrorCode PCDestroy_MPI(PC pc) { 518 PCMPICommand request = PCMPI_DESTROY; 519 520 PetscFunctionBegin; 521 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 522 PetscCall(PCMPIDestroy(pc)); 523 PetscCall(PetscFree(pc->data)); 524 PetscFunctionReturn(0); 525 } 526 527 /* 528 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 529 */ 530 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) { 531 PC_MPI *km = (PC_MPI *)pc->data; 532 MPI_Comm comm; 533 PetscMPIInt size; 534 const char *prefix; 535 536 PetscFunctionBegin; 537 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 538 PetscCallMPI(MPI_Comm_size(comm, &size)); 539 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 540 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 541 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 542 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix)); 543 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n")); 544 PetscFunctionReturn(0); 545 } 546 547 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) { 548 PC_MPI *km = (PC_MPI *)pc->data; 549 550 PetscFunctionBegin; 551 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 552 PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 553 PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 554 PetscOptionsHeadEnd(); 555 PetscFunctionReturn(0); 556 } 557 558 /*MC 559 PCMPI - Calls an MPI parallel KSP to solve a linear system from user code running on one process 560 561 Level: beginner 562 563 Options Database: 564 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 565 . -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 566 . -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 567 - -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes 568 569 Notes: 570 The options database prefix for the MPI parallel KSP and PC is -mpi_ 571 572 The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for 573 potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware. 574 575 It can be particularly useful for user OpenMP code or potentially user GPU code. 576 577 When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a KSP with the options prefix of -mpi) 578 and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the KSP directly. 579 580 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()` 581 582 M*/ 583 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) { 584 PC_MPI *km; 585 586 PetscFunctionBegin; 587 PetscCall(PetscNewLog(pc, &km)); 588 pc->data = (void *)km; 589 590 km->mincntperrank = 10000; 591 592 pc->ops->setup = PCSetUp_MPI; 593 pc->ops->apply = PCApply_MPI; 594 pc->ops->destroy = PCDestroy_MPI; 595 pc->ops->view = PCView_MPI; 596 pc->ops->setfromoptions = PCSetFromOptions_MPI; 597 PetscFunctionReturn(0); 598 } 599