1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 #include <petsc.h> 16 17 #define PC_MPI_MAX_RANKS 256 18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 19 20 typedef struct { 21 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 22 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 23 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 24 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 25 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 26 } PC_MPI; 27 28 typedef enum { 29 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 30 PCMPI_CREATE, 31 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 32 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 33 PCMPI_SOLVE, 34 PCMPI_VIEW, 35 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 36 } PCMPICommand; 37 38 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 39 static PetscBool PCMPICommSet = PETSC_FALSE; 40 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 41 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 42 43 static PetscErrorCode PCMPICommsCreate(void) 44 { 45 MPI_Comm comm = PC_MPI_COMM_WORLD; 46 PetscMPIInt size, rank, i; 47 48 PetscFunctionBegin; 49 PetscCallMPI(MPI_Comm_size(comm, &size)); 50 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 51 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 52 /* comm for size 1 is useful only for debugging */ 53 for (i = 0; i < size; i++) { 54 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 55 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 56 PCMPISolveCounts[i] = 0; 57 PCMPIKSPCounts[i] = 0; 58 PCMPIIterations[i] = 0; 59 PCMPISizes[i] = 0; 60 } 61 PCMPICommSet = PETSC_TRUE; 62 PetscFunctionReturn(PETSC_SUCCESS); 63 } 64 65 PetscErrorCode PCMPICommsDestroy(void) 66 { 67 MPI_Comm comm = PC_MPI_COMM_WORLD; 68 PetscMPIInt size, rank, i; 69 70 PetscFunctionBegin; 71 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 72 PetscCallMPI(MPI_Comm_size(comm, &size)); 73 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 74 for (i = 0; i < size; i++) { 75 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 76 } 77 PCMPICommSet = PETSC_FALSE; 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 static PetscErrorCode PCMPICreate(PC pc) 82 { 83 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 84 MPI_Comm comm = PC_MPI_COMM_WORLD; 85 KSP ksp; 86 PetscInt N[2], mincntperrank = 0; 87 PetscMPIInt size; 88 Mat sA; 89 char *cprefix = NULL; 90 PetscMPIInt len = 0; 91 92 PetscFunctionBegin; 93 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 94 PetscCallMPI(MPI_Comm_size(comm, &size)); 95 if (pc) { 96 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 97 PetscCall(PCGetOperators(pc, &sA, &sA)); 98 PetscCall(MatGetSize(sA, &N[0], &N[1])); 99 } 100 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 101 102 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 103 if (km) mincntperrank = km->mincntperrank; 104 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 105 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 106 if (comm == MPI_COMM_NULL) { 107 ksp = NULL; 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 PetscCall(PetscLogStagePush(PCMPIStage)); 111 PetscCall(KSPCreate(comm, &ksp)); 112 PetscCall(KSPSetNestLevel(ksp, 1)); 113 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 114 PetscCall(PetscLogStagePop()); 115 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 116 if (pc) { 117 size_t slen; 118 const char *prefix = NULL; 119 char *found = NULL; 120 121 PetscCallMPI(MPI_Comm_size(comm, &size)); 122 PCMPIKSPCounts[size - 1]++; 123 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 124 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 125 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 126 PetscCall(PetscStrallocpy(prefix, &cprefix)); 127 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 128 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 129 *found = 0; 130 PetscCall(PetscStrlen(cprefix, &slen)); 131 len = (PetscMPIInt)slen; 132 } 133 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 134 if (len) { 135 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 136 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 137 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 138 } 139 PetscCall(PetscFree(cprefix)); 140 PetscFunctionReturn(PETSC_SUCCESS); 141 } 142 143 static PetscErrorCode PCMPISetMat(PC pc) 144 { 145 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 146 Mat A; 147 PetscInt m, n, *ia, *ja, j, bs; 148 Mat sA; 149 MPI_Comm comm = PC_MPI_COMM_WORLD; 150 KSP ksp; 151 PetscLayout layout; 152 const PetscInt *IA = NULL, *JA = NULL; 153 const PetscInt *range; 154 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 155 PetscScalar *a; 156 const PetscScalar *sa = NULL; 157 PetscInt matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0}; 158 char *cprefix; 159 160 PetscFunctionBegin; 161 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 162 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 163 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 164 if (pc) { 165 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 166 const char *prefix; 167 size_t clen; 168 169 PetscCallMPI(MPI_Comm_size(comm, &size)); 170 PCMPIMatCounts[size - 1]++; 171 PetscCall(PCGetOperators(pc, &sA, &sA)); 172 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 173 PetscCall(MatGetBlockSize(sA, &bs)); 174 matproperties[2] = bs; 175 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 176 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 177 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 178 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 179 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 180 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 181 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 182 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 183 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 184 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 185 PetscCall(PetscStrallocpy(prefix, &cprefix)); 186 PetscCall(PetscStrlen(cprefix, &clen)); 187 matproperties[7] = (PetscInt)clen; 188 } 189 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 190 191 /* determine ownership ranges of matrix columns */ 192 PetscCall(PetscLayoutCreate(comm, &layout)); 193 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 194 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 195 PetscCall(PetscLayoutSetUp(layout)); 196 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 197 PetscCall(PetscLayoutDestroy(&layout)); 198 199 /* determine ownership ranges of matrix rows */ 200 PetscCall(PetscLayoutCreate(comm, &layout)); 201 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 202 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 203 PetscCall(PetscLayoutSetUp(layout)); 204 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 205 206 /* copy over the matrix nonzero structure and values */ 207 if (pc) { 208 NZ = km->NZ; 209 NZdispl = km->NZdispl; 210 PetscCall(PetscLayoutGetRanges(layout, &range)); 211 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 212 for (i = 0; i < size; i++) { 213 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 214 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 215 } 216 displi[0] = 0; 217 NZdispl[0] = 0; 218 for (j = 1; j < size; j++) { 219 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 220 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 221 } 222 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 223 } 224 PetscCall(PetscLayoutDestroy(&layout)); 225 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 226 227 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 228 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 229 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 230 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 231 232 if (pc) { 233 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 234 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 235 } 236 237 for (j = 1; j < n + 1; j++) ia[j] -= ia[0]; 238 ia[0] = 0; 239 PetscCall(PetscLogStagePush(PCMPIStage)); 240 PetscCall(MatCreate(comm, &A)); 241 if (matproperties[7] > 0) { 242 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 243 PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm)); 244 PetscCall(MatSetOptionsPrefix(A, cprefix)); 245 PetscCall(PetscFree(cprefix)); 246 } 247 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 248 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 249 PetscCall(MatSetType(A, MATMPIAIJ)); 250 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 251 PetscCall(MatSetBlockSize(A, matproperties[2])); 252 253 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 254 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 255 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 256 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 257 258 PetscCall(PetscFree3(ia, ja, a)); 259 PetscCall(KSPSetOperators(ksp, A, A)); 260 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 261 PetscCall(PetscLogStagePop()); 262 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 263 const PetscInt *range; 264 265 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 266 for (i = 0; i < size; i++) { 267 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 268 km->displ[i] = (PetscMPIInt)range[i]; 269 } 270 } 271 PetscCall(MatDestroy(&A)); 272 PetscCall(KSPSetFromOptions(ksp)); 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 277 { 278 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 279 KSP ksp; 280 Mat sA, A; 281 MPI_Comm comm = PC_MPI_COMM_WORLD; 282 PetscScalar *a; 283 PetscCount nz; 284 const PetscScalar *sa = NULL; 285 PetscMPIInt size; 286 PetscInt matproperties[4] = {0, 0, 0, 0}; 287 288 PetscFunctionBegin; 289 if (pc) { 290 PetscCall(PCGetOperators(pc, &sA, &sA)); 291 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 292 } 293 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 294 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 295 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 296 PetscCallMPI(MPI_Comm_size(comm, &size)); 297 PCMPIMatCounts[size - 1]++; 298 PetscCall(KSPGetOperators(ksp, NULL, &A)); 299 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 300 PetscCall(PetscMalloc1(nz, &a)); 301 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 302 if (pc) { 303 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 304 305 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 306 307 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 308 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 309 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 310 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 311 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 312 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 313 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 314 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 315 } 316 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 317 PetscCall(PetscFree(a)); 318 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 319 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 320 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 321 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 322 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 323 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 328 { 329 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 330 KSP ksp; 331 MPI_Comm comm = PC_MPI_COMM_WORLD; 332 const PetscScalar *sb = NULL, *x; 333 PetscScalar *b, *sx = NULL; 334 PetscInt its, n; 335 PetscMPIInt size; 336 337 PetscFunctionBegin; 338 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 339 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 340 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 341 342 /* TODO: optimize code to not require building counts/displ every time */ 343 344 /* scatterv rhs */ 345 PetscCallMPI(MPI_Comm_size(comm, &size)); 346 if (pc) { 347 PetscInt N; 348 349 PCMPISolveCounts[size - 1]++; 350 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 351 PCMPISizes[size - 1] += N; 352 PetscCall(VecGetArrayRead(B, &sb)); 353 } 354 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 355 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 356 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 357 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 358 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 359 360 PetscCall(PetscLogStagePush(PCMPIStage)); 361 PetscCall(KSPSolve(ksp, NULL, NULL)); 362 PetscCall(PetscLogStagePop()); 363 PetscCall(KSPGetIterationNumber(ksp, &its)); 364 PCMPIIterations[size - 1] += its; 365 366 /* gather solution */ 367 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 368 if (pc) PetscCall(VecGetArray(X, &sx)); 369 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 370 if (pc) PetscCall(VecRestoreArray(X, &sx)); 371 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 372 PetscFunctionReturn(PETSC_SUCCESS); 373 } 374 375 static PetscErrorCode PCMPIDestroy(PC pc) 376 { 377 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 378 KSP ksp; 379 MPI_Comm comm = PC_MPI_COMM_WORLD; 380 381 PetscFunctionBegin; 382 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 383 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 384 PetscCall(KSPDestroy(&ksp)); 385 PetscFunctionReturn(PETSC_SUCCESS); 386 } 387 388 PetscBool PCMPIServerActive = PETSC_FALSE; 389 390 /*@C 391 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 392 parallel `KSP` solves and management of parallel `KSP` objects. 393 394 Logically Collective on all MPI processes except rank 0 395 396 Options Database Keys: 397 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 398 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 399 400 Level: developer 401 402 Note: 403 This is normally started automatically in `PetscInitialize()` when the option is provided 404 405 See `PCMPI` for information on using the solver with a `KSP` object 406 407 Developer Notes: 408 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 409 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 410 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 411 412 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 413 414 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 415 416 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 417 418 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 419 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 420 421 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 422 all MPI processes but the user callback only runs on one MPI process per node. 423 424 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 425 the `MPI_Comm` argument from PETSc calls. 426 427 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 428 @*/ 429 PetscErrorCode PCMPIServerBegin(void) 430 { 431 PetscMPIInt rank; 432 433 PetscFunctionBegin; 434 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 435 if (PetscDefined(USE_SINGLE_LIBRARY)) { 436 PetscCall(VecInitializePackage()); 437 PetscCall(MatInitializePackage()); 438 PetscCall(DMInitializePackage()); 439 PetscCall(PCInitializePackage()); 440 PetscCall(KSPInitializePackage()); 441 PetscCall(SNESInitializePackage()); 442 PetscCall(TSInitializePackage()); 443 PetscCall(TaoInitializePackage()); 444 } 445 446 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 447 if (rank == 0) { 448 PETSC_COMM_WORLD = PETSC_COMM_SELF; 449 PCMPIServerActive = PETSC_TRUE; 450 PetscFunctionReturn(PETSC_SUCCESS); 451 } 452 453 while (PETSC_TRUE) { 454 PCMPICommand request = PCMPI_CREATE; 455 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 456 switch (request) { 457 case PCMPI_CREATE: 458 PetscCall(PCMPICreate(NULL)); 459 break; 460 case PCMPI_SET_MAT: 461 PetscCall(PCMPISetMat(NULL)); 462 break; 463 case PCMPI_UPDATE_MAT_VALUES: 464 PetscCall(PCMPIUpdateMatValues(NULL)); 465 break; 466 case PCMPI_VIEW: 467 // PetscCall(PCMPIView(NULL)); 468 break; 469 case PCMPI_SOLVE: 470 PetscCall(PCMPISolve(NULL, NULL, NULL)); 471 break; 472 case PCMPI_DESTROY: 473 PetscCall(PCMPIDestroy(NULL)); 474 break; 475 case PCMPI_EXIT: 476 PetscCall(PetscFinalize()); 477 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 478 break; 479 default: 480 break; 481 } 482 } 483 PetscFunctionReturn(PETSC_SUCCESS); 484 } 485 486 /*@C 487 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 488 parallel KSP solves and management of parallel `KSP` objects. 489 490 Logically Collective on all MPI ranks except 0 491 492 Level: developer 493 494 Note: 495 This is normally ended automatically in `PetscFinalize()` when the option is provided 496 497 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 498 @*/ 499 PetscErrorCode PCMPIServerEnd(void) 500 { 501 PCMPICommand request = PCMPI_EXIT; 502 503 PetscFunctionBegin; 504 if (PetscGlobalRank == 0) { 505 PetscViewer viewer = NULL; 506 PetscViewerFormat format; 507 508 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 509 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 510 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 511 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 512 PetscOptionsEnd(); 513 if (viewer) { 514 PetscBool isascii; 515 516 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 517 if (isascii) { 518 PetscMPIInt size; 519 PetscMPIInt i; 520 521 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 522 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 523 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 524 if (PCMPIKSPCountsSeq) { 525 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 526 } 527 for (i = 0; i < size; i++) { 528 if (PCMPIKSPCounts[i]) { 529 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 530 } 531 } 532 } 533 PetscCall(PetscViewerDestroy(&viewer)); 534 } 535 } 536 PetscCall(PCMPICommsDestroy()); 537 PCMPIServerActive = PETSC_FALSE; 538 PetscFunctionReturn(PETSC_SUCCESS); 539 } 540 541 /* 542 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 543 because, for example, the problem is small. This version is more efficient because it does not require copying any data 544 */ 545 static PetscErrorCode PCSetUp_Seq(PC pc) 546 { 547 PC_MPI *km = (PC_MPI *)pc->data; 548 Mat sA; 549 const char *prefix; 550 char *found = NULL, *cprefix; 551 552 PetscFunctionBegin; 553 PetscCall(PCGetOperators(pc, NULL, &sA)); 554 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 555 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 556 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 557 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 558 559 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 560 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 561 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 562 PetscCall(PetscStrallocpy(prefix, &cprefix)); 563 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 564 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 565 *found = 0; 566 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 567 PetscCall(PetscFree(cprefix)); 568 569 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 570 PetscCall(KSPSetFromOptions(km->ksps[0])); 571 PetscCall(KSPSetUp(km->ksps[0])); 572 PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 573 PCMPIKSPCountsSeq++; 574 PetscFunctionReturn(PETSC_SUCCESS); 575 } 576 577 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 578 { 579 PC_MPI *km = (PC_MPI *)pc->data; 580 PetscInt its, n; 581 Mat A; 582 583 PetscFunctionBegin; 584 PetscCall(KSPSolve(km->ksps[0], b, x)); 585 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 586 PCMPISolveCountsSeq++; 587 PCMPIIterationsSeq += its; 588 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 589 PetscCall(MatGetSize(A, &n, NULL)); 590 PCMPISizesSeq += n; 591 PetscFunctionReturn(PETSC_SUCCESS); 592 } 593 594 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 595 { 596 PC_MPI *km = (PC_MPI *)pc->data; 597 598 PetscFunctionBegin; 599 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 600 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 601 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 602 PetscFunctionReturn(PETSC_SUCCESS); 603 } 604 605 static PetscErrorCode PCDestroy_Seq(PC pc) 606 { 607 PC_MPI *km = (PC_MPI *)pc->data; 608 609 PetscFunctionBegin; 610 PetscCall(KSPDestroy(&km->ksps[0])); 611 PetscCall(PetscFree(pc->data)); 612 PetscFunctionReturn(PETSC_SUCCESS); 613 } 614 615 /* 616 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 617 right hand side to the parallel PC 618 */ 619 static PetscErrorCode PCSetUp_MPI(PC pc) 620 { 621 PC_MPI *km = (PC_MPI *)pc->data; 622 PetscMPIInt rank, size; 623 PCMPICommand request; 624 PetscBool newmatrix = PETSC_FALSE; 625 626 PetscFunctionBegin; 627 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 628 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 629 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 630 631 if (!pc->setupcalled) { 632 if (!km->alwaysuseserver) { 633 PetscInt n; 634 Mat sA; 635 /* short circuit for small systems */ 636 PetscCall(PCGetOperators(pc, &sA, &sA)); 637 PetscCall(MatGetSize(sA, &n, NULL)); 638 if (n < 2 * km->mincntperrank - 1 || size == 1) { 639 pc->ops->setup = NULL; 640 pc->ops->apply = PCApply_Seq; 641 pc->ops->destroy = PCDestroy_Seq; 642 pc->ops->view = PCView_Seq; 643 PetscCall(PCSetUp_Seq(pc)); 644 PetscFunctionReturn(PETSC_SUCCESS); 645 } 646 } 647 648 request = PCMPI_CREATE; 649 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 650 PetscCall(PCMPICreate(pc)); 651 newmatrix = PETSC_TRUE; 652 } 653 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 654 655 if (newmatrix) { 656 PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 657 request = PCMPI_SET_MAT; 658 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 659 PetscCall(PCMPISetMat(pc)); 660 } else { 661 PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n")); 662 request = PCMPI_UPDATE_MAT_VALUES; 663 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 664 PetscCall(PCMPIUpdateMatValues(pc)); 665 } 666 PetscFunctionReturn(PETSC_SUCCESS); 667 } 668 669 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 670 { 671 PCMPICommand request = PCMPI_SOLVE; 672 673 PetscFunctionBegin; 674 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 675 PetscCall(PCMPISolve(pc, b, x)); 676 PetscFunctionReturn(PETSC_SUCCESS); 677 } 678 679 static PetscErrorCode PCDestroy_MPI(PC pc) 680 { 681 PCMPICommand request = PCMPI_DESTROY; 682 683 PetscFunctionBegin; 684 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 685 PetscCall(PCMPIDestroy(pc)); 686 PetscCall(PetscFree(pc->data)); 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 /* 691 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 692 */ 693 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 694 { 695 PC_MPI *km = (PC_MPI *)pc->data; 696 MPI_Comm comm; 697 PetscMPIInt size; 698 699 PetscFunctionBegin; 700 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 701 PetscCallMPI(MPI_Comm_size(comm, &size)); 702 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 703 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 704 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 705 PetscFunctionReturn(PETSC_SUCCESS); 706 } 707 708 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 709 { 710 PC_MPI *km = (PC_MPI *)pc->data; 711 712 PetscFunctionBegin; 713 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 714 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 715 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 716 PetscOptionsHeadEnd(); 717 PetscFunctionReturn(PETSC_SUCCESS); 718 } 719 720 /*MC 721 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 722 723 Options Database Keys for the Server: 724 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 725 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 726 727 Options Database Keys for a specific `KSP` object 728 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 729 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 730 731 Level: developer 732 733 Notes: 734 The options database prefix for the actual solver is any prefix provided before use to the origjnal `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 735 736 It can be particularly useful for user OpenMP code or potentially user GPU code. 737 738 When the program is running with a single MPI process then it directly uses the provided matrix and right hand side 739 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 740 741 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 742 because they are not the actual solver objects. 743 744 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 745 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 746 747 Developer Note: 748 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 749 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 750 751 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 752 M*/ 753 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 754 { 755 PC_MPI *km; 756 char *found = NULL; 757 758 PetscFunctionBegin; 759 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 760 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 761 762 /* material from PCSetType() */ 763 PetscTryTypeMethod(pc, destroy); 764 pc->ops->destroy = NULL; 765 pc->data = NULL; 766 767 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 768 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 769 pc->modifysubmatrices = NULL; 770 pc->modifysubmatricesP = NULL; 771 pc->setupcalled = 0; 772 773 PetscCall(PetscNew(&km)); 774 pc->data = (void *)km; 775 776 km->mincntperrank = 10000; 777 778 pc->ops->setup = PCSetUp_MPI; 779 pc->ops->apply = PCApply_MPI; 780 pc->ops->destroy = PCDestroy_MPI; 781 pc->ops->view = PCView_MPI; 782 pc->ops->setfromoptions = PCSetFromOptions_MPI; 783 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 784 PetscFunctionReturn(PETSC_SUCCESS); 785 } 786