1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 #include <petsc.h> 16 17 #define PC_MPI_MAX_RANKS 256 18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 19 20 typedef struct { 21 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 22 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 23 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 24 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 25 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 26 } PC_MPI; 27 28 typedef enum { 29 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 30 PCMPI_CREATE, 31 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 32 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 33 PCMPI_SOLVE, 34 PCMPI_VIEW, 35 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 36 } PCMPICommand; 37 38 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 39 static PetscBool PCMPICommSet = PETSC_FALSE; 40 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 41 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 42 43 static PetscErrorCode PCMPICommsCreate(void) 44 { 45 MPI_Comm comm = PC_MPI_COMM_WORLD; 46 PetscMPIInt size, rank, i; 47 48 PetscFunctionBegin; 49 PetscCallMPI(MPI_Comm_size(comm, &size)); 50 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 51 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 52 /* comm for size 1 is useful only for debugging */ 53 for (i = 0; i < size; i++) { 54 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 55 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 56 PCMPISolveCounts[i] = 0; 57 PCMPIKSPCounts[i] = 0; 58 PCMPIIterations[i] = 0; 59 PCMPISizes[i] = 0; 60 } 61 PCMPICommSet = PETSC_TRUE; 62 PetscFunctionReturn(PETSC_SUCCESS); 63 } 64 65 PetscErrorCode PCMPICommsDestroy(void) 66 { 67 MPI_Comm comm = PC_MPI_COMM_WORLD; 68 PetscMPIInt size, rank, i; 69 70 PetscFunctionBegin; 71 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 72 PetscCallMPI(MPI_Comm_size(comm, &size)); 73 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 74 for (i = 0; i < size; i++) { 75 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 76 } 77 PCMPICommSet = PETSC_FALSE; 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 static PetscErrorCode PCMPICreate(PC pc) 82 { 83 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 84 MPI_Comm comm = PC_MPI_COMM_WORLD; 85 KSP ksp; 86 PetscInt N[2], mincntperrank = 0; 87 PetscMPIInt size; 88 Mat sA; 89 char *cprefix = NULL; 90 PetscMPIInt len = 0; 91 92 PetscFunctionBegin; 93 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 94 PetscCallMPI(MPI_Comm_size(comm, &size)); 95 if (pc) { 96 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 97 PetscCall(PCGetOperators(pc, &sA, &sA)); 98 PetscCall(MatGetSize(sA, &N[0], &N[1])); 99 } 100 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 101 102 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 103 if (km) mincntperrank = km->mincntperrank; 104 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 105 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 106 if (comm == MPI_COMM_NULL) { 107 ksp = NULL; 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 PetscCall(PetscLogStagePush(PCMPIStage)); 111 PetscCall(KSPCreate(comm, &ksp)); 112 PetscCall(KSPSetNestLevel(ksp, 1)); 113 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 114 PetscCall(PetscLogStagePop()); 115 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 116 if (pc) { 117 size_t slen; 118 const char *prefix = NULL; 119 char *found = NULL; 120 121 PetscCallMPI(MPI_Comm_size(comm, &size)); 122 PCMPIKSPCounts[size - 1]++; 123 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 124 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 125 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 126 PetscCall(PetscStrallocpy(prefix, &cprefix)); 127 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 128 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 129 *found = 0; 130 PetscCall(PetscStrlen(cprefix, &slen)); 131 len = (PetscMPIInt)slen; 132 } 133 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 134 if (len) { 135 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 136 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 137 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 138 } 139 PetscCall(PetscFree(cprefix)); 140 PetscFunctionReturn(PETSC_SUCCESS); 141 } 142 143 static PetscErrorCode PCMPISetMat(PC pc) 144 { 145 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 146 Mat A; 147 PetscInt m, n, *ia, *ja, j, bs; 148 Mat sA; 149 MPI_Comm comm = PC_MPI_COMM_WORLD; 150 KSP ksp; 151 PetscLayout layout; 152 const PetscInt *IA = NULL, *JA = NULL; 153 const PetscInt *range; 154 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 155 PetscScalar *a; 156 const PetscScalar *sa = NULL; 157 PetscInt matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0}; 158 char *cprefix; 159 160 PetscFunctionBegin; 161 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 162 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 163 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 164 if (pc) { 165 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 166 const char *prefix; 167 size_t clen; 168 169 PetscCallMPI(MPI_Comm_size(comm, &size)); 170 PCMPIMatCounts[size - 1]++; 171 PetscCall(PCGetOperators(pc, &sA, &sA)); 172 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 173 PetscCall(MatGetBlockSize(sA, &bs)); 174 matproperties[2] = bs; 175 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 176 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 177 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 178 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 179 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 180 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 181 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 182 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 183 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 184 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 185 PetscCall(PetscStrallocpy(prefix, &cprefix)); 186 PetscCall(PetscStrlen(cprefix, &clen)); 187 matproperties[7] = (PetscInt)clen; 188 } 189 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 190 191 /* determine ownership ranges of matrix columns */ 192 PetscCall(PetscLayoutCreate(comm, &layout)); 193 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 194 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 195 PetscCall(PetscLayoutSetUp(layout)); 196 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 197 PetscCall(PetscLayoutDestroy(&layout)); 198 199 /* determine ownership ranges of matrix rows */ 200 PetscCall(PetscLayoutCreate(comm, &layout)); 201 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 202 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 203 PetscCall(PetscLayoutSetUp(layout)); 204 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 205 206 /* copy over the matrix nonzero structure and values */ 207 if (pc) { 208 NZ = km->NZ; 209 NZdispl = km->NZdispl; 210 PetscCall(PetscLayoutGetRanges(layout, &range)); 211 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 212 for (i = 0; i < size; i++) { 213 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 214 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 215 } 216 displi[0] = 0; 217 NZdispl[0] = 0; 218 for (j = 1; j < size; j++) { 219 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 220 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 221 } 222 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 223 } 224 PetscCall(PetscLayoutDestroy(&layout)); 225 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 226 227 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 228 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 229 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 230 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 231 232 if (pc) { 233 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 234 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 235 } 236 237 for (j = 1; j < n + 1; j++) ia[j] -= ia[0]; 238 ia[0] = 0; 239 PetscCall(PetscLogStagePush(PCMPIStage)); 240 PetscCall(MatCreate(comm, &A)); 241 if (matproperties[7] > 0) { 242 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 243 PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm)); 244 PetscCall(MatSetOptionsPrefix(A, cprefix)); 245 PetscCall(PetscFree(cprefix)); 246 } 247 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 248 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 249 PetscCall(MatSetType(A, MATMPIAIJ)); 250 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 251 PetscCall(MatSetBlockSize(A, matproperties[2])); 252 253 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 254 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 255 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 256 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 257 258 PetscCall(PetscFree3(ia, ja, a)); 259 PetscCall(KSPSetOperators(ksp, A, A)); 260 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 261 PetscCall(PetscLogStagePop()); 262 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 263 const PetscInt *range; 264 265 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 266 for (i = 0; i < size; i++) { 267 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 268 km->displ[i] = (PetscMPIInt)range[i]; 269 } 270 } 271 PetscCall(MatDestroy(&A)); 272 PetscCall(KSPSetFromOptions(ksp)); 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 277 { 278 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 279 KSP ksp; 280 Mat sA, A; 281 MPI_Comm comm = PC_MPI_COMM_WORLD; 282 PetscScalar *a; 283 PetscCount nz; 284 const PetscScalar *sa = NULL; 285 PetscMPIInt size; 286 PetscInt matproperties[4] = {0, 0, 0, 0}; 287 288 PetscFunctionBegin; 289 if (pc) { 290 PetscCall(PCGetOperators(pc, &sA, &sA)); 291 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 292 } 293 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 294 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 295 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 296 PetscCallMPI(MPI_Comm_size(comm, &size)); 297 PCMPIMatCounts[size - 1]++; 298 PetscCall(KSPGetOperators(ksp, NULL, &A)); 299 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 300 PetscCall(PetscMalloc1(nz, &a)); 301 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 302 if (pc) { 303 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 304 305 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 306 307 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 308 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 309 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 310 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 311 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 312 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 313 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 314 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 315 } 316 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 317 PetscCall(PetscFree(a)); 318 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 319 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 320 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 321 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 322 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 323 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 328 { 329 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 330 KSP ksp; 331 MPI_Comm comm = PC_MPI_COMM_WORLD; 332 const PetscScalar *sb = NULL, *x; 333 PetscScalar *b, *sx = NULL; 334 PetscInt its, n; 335 PetscMPIInt size; 336 337 PetscFunctionBegin; 338 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 339 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 340 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 341 342 /* TODO: optimize code to not require building counts/displ every time */ 343 344 /* scatterv rhs */ 345 PetscCallMPI(MPI_Comm_size(comm, &size)); 346 if (pc) { 347 PetscInt N; 348 349 PCMPISolveCounts[size - 1]++; 350 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 351 PCMPISizes[size - 1] += N; 352 PetscCall(VecGetArrayRead(B, &sb)); 353 } 354 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 355 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 356 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 357 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 358 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 359 360 PetscCall(PetscLogStagePush(PCMPIStage)); 361 PetscCall(KSPSolve(ksp, NULL, NULL)); 362 PetscCall(PetscLogStagePop()); 363 PetscCall(KSPGetIterationNumber(ksp, &its)); 364 PCMPIIterations[size - 1] += its; 365 366 /* gather solution */ 367 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 368 if (pc) PetscCall(VecGetArray(X, &sx)); 369 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 370 if (pc) PetscCall(VecRestoreArray(X, &sx)); 371 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 372 PetscFunctionReturn(PETSC_SUCCESS); 373 } 374 375 static PetscErrorCode PCMPIDestroy(PC pc) 376 { 377 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 378 KSP ksp; 379 MPI_Comm comm = PC_MPI_COMM_WORLD; 380 381 PetscFunctionBegin; 382 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 383 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 384 PetscCall(PetscLogStagePush(PCMPIStage)); 385 PetscCall(KSPDestroy(&ksp)); 386 PetscCall(PetscLogStagePop()); 387 PetscFunctionReturn(PETSC_SUCCESS); 388 } 389 390 PetscBool PCMPIServerActive = PETSC_FALSE; 391 392 /*@C 393 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 394 parallel `KSP` solves and management of parallel `KSP` objects. 395 396 Logically Collective on all MPI processes except rank 0 397 398 Options Database Keys: 399 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 400 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 401 402 Level: developer 403 404 Note: 405 This is normally started automatically in `PetscInitialize()` when the option is provided 406 407 See `PCMPI` for information on using the solver with a `KSP` object 408 409 Developer Notes: 410 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 411 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 412 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 413 414 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 415 416 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 417 418 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 419 420 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 421 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 422 423 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 424 all MPI processes but the user callback only runs on one MPI process per node. 425 426 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 427 the `MPI_Comm` argument from PETSc calls. 428 429 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 430 @*/ 431 PetscErrorCode PCMPIServerBegin(void) 432 { 433 PetscMPIInt rank; 434 435 PetscFunctionBegin; 436 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 437 if (PetscDefined(USE_SINGLE_LIBRARY)) { 438 PetscCall(VecInitializePackage()); 439 PetscCall(MatInitializePackage()); 440 PetscCall(DMInitializePackage()); 441 PetscCall(PCInitializePackage()); 442 PetscCall(KSPInitializePackage()); 443 PetscCall(SNESInitializePackage()); 444 PetscCall(TSInitializePackage()); 445 PetscCall(TaoInitializePackage()); 446 } 447 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 448 449 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 450 if (rank == 0) { 451 PETSC_COMM_WORLD = PETSC_COMM_SELF; 452 PCMPIServerActive = PETSC_TRUE; 453 PetscFunctionReturn(PETSC_SUCCESS); 454 } 455 456 while (PETSC_TRUE) { 457 PCMPICommand request = PCMPI_CREATE; 458 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 459 switch (request) { 460 case PCMPI_CREATE: 461 PetscCall(PCMPICreate(NULL)); 462 break; 463 case PCMPI_SET_MAT: 464 PetscCall(PCMPISetMat(NULL)); 465 break; 466 case PCMPI_UPDATE_MAT_VALUES: 467 PetscCall(PCMPIUpdateMatValues(NULL)); 468 break; 469 case PCMPI_VIEW: 470 // PetscCall(PCMPIView(NULL)); 471 break; 472 case PCMPI_SOLVE: 473 PetscCall(PCMPISolve(NULL, NULL, NULL)); 474 break; 475 case PCMPI_DESTROY: 476 PetscCall(PCMPIDestroy(NULL)); 477 break; 478 case PCMPI_EXIT: 479 PetscCall(PetscFinalize()); 480 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 481 break; 482 default: 483 break; 484 } 485 } 486 PetscFunctionReturn(PETSC_SUCCESS); 487 } 488 489 /*@C 490 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 491 parallel KSP solves and management of parallel `KSP` objects. 492 493 Logically Collective on all MPI ranks except 0 494 495 Level: developer 496 497 Note: 498 This is normally ended automatically in `PetscFinalize()` when the option is provided 499 500 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 501 @*/ 502 PetscErrorCode PCMPIServerEnd(void) 503 { 504 PCMPICommand request = PCMPI_EXIT; 505 506 PetscFunctionBegin; 507 if (PetscGlobalRank == 0) { 508 PetscViewer viewer = NULL; 509 PetscViewerFormat format; 510 511 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 512 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 513 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 514 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 515 PetscOptionsEnd(); 516 if (viewer) { 517 PetscBool isascii; 518 519 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 520 if (isascii) { 521 PetscMPIInt size; 522 PetscMPIInt i; 523 524 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 525 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 526 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 527 if (PCMPIKSPCountsSeq) { 528 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 529 } 530 for (i = 0; i < size; i++) { 531 if (PCMPIKSPCounts[i]) { 532 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 533 } 534 } 535 } 536 PetscCall(PetscOptionsRestoreViewer(&viewer)); 537 } 538 } 539 PetscCall(PCMPICommsDestroy()); 540 PCMPIServerActive = PETSC_FALSE; 541 PetscFunctionReturn(PETSC_SUCCESS); 542 } 543 544 /* 545 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 546 because, for example, the problem is small. This version is more efficient because it does not require copying any data 547 */ 548 static PetscErrorCode PCSetUp_Seq(PC pc) 549 { 550 PC_MPI *km = (PC_MPI *)pc->data; 551 Mat sA; 552 const char *prefix; 553 char *found = NULL, *cprefix; 554 555 PetscFunctionBegin; 556 PetscCall(PCGetOperators(pc, NULL, &sA)); 557 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 558 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 559 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 560 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 561 562 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 563 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 564 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 565 PetscCall(PetscStrallocpy(prefix, &cprefix)); 566 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 567 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 568 *found = 0; 569 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 570 PetscCall(PetscFree(cprefix)); 571 572 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 573 PetscCall(KSPSetFromOptions(km->ksps[0])); 574 PetscCall(KSPSetUp(km->ksps[0])); 575 PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 576 PCMPIKSPCountsSeq++; 577 PetscFunctionReturn(PETSC_SUCCESS); 578 } 579 580 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 581 { 582 PC_MPI *km = (PC_MPI *)pc->data; 583 PetscInt its, n; 584 Mat A; 585 586 PetscFunctionBegin; 587 PetscCall(KSPSolve(km->ksps[0], b, x)); 588 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 589 PCMPISolveCountsSeq++; 590 PCMPIIterationsSeq += its; 591 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 592 PetscCall(MatGetSize(A, &n, NULL)); 593 PCMPISizesSeq += n; 594 PetscFunctionReturn(PETSC_SUCCESS); 595 } 596 597 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 598 { 599 PC_MPI *km = (PC_MPI *)pc->data; 600 601 PetscFunctionBegin; 602 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 603 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 604 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 605 PetscFunctionReturn(PETSC_SUCCESS); 606 } 607 608 static PetscErrorCode PCDestroy_Seq(PC pc) 609 { 610 PC_MPI *km = (PC_MPI *)pc->data; 611 612 PetscFunctionBegin; 613 PetscCall(KSPDestroy(&km->ksps[0])); 614 PetscCall(PetscFree(pc->data)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 620 right hand side to the parallel PC 621 */ 622 static PetscErrorCode PCSetUp_MPI(PC pc) 623 { 624 PC_MPI *km = (PC_MPI *)pc->data; 625 PetscMPIInt rank, size; 626 PCMPICommand request; 627 PetscBool newmatrix = PETSC_FALSE; 628 629 PetscFunctionBegin; 630 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 631 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 632 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 633 634 if (!pc->setupcalled) { 635 if (!km->alwaysuseserver) { 636 PetscInt n; 637 Mat sA; 638 /* short circuit for small systems */ 639 PetscCall(PCGetOperators(pc, &sA, &sA)); 640 PetscCall(MatGetSize(sA, &n, NULL)); 641 if (n < 2 * km->mincntperrank - 1 || size == 1) { 642 pc->ops->setup = NULL; 643 pc->ops->apply = PCApply_Seq; 644 pc->ops->destroy = PCDestroy_Seq; 645 pc->ops->view = PCView_Seq; 646 PetscCall(PCSetUp_Seq(pc)); 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 } 650 651 request = PCMPI_CREATE; 652 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 653 PetscCall(PCMPICreate(pc)); 654 newmatrix = PETSC_TRUE; 655 } 656 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 657 658 if (newmatrix) { 659 PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 660 request = PCMPI_SET_MAT; 661 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 662 PetscCall(PCMPISetMat(pc)); 663 } else { 664 PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n")); 665 request = PCMPI_UPDATE_MAT_VALUES; 666 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 667 PetscCall(PCMPIUpdateMatValues(pc)); 668 } 669 PetscFunctionReturn(PETSC_SUCCESS); 670 } 671 672 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 673 { 674 PCMPICommand request = PCMPI_SOLVE; 675 676 PetscFunctionBegin; 677 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 678 PetscCall(PCMPISolve(pc, b, x)); 679 PetscFunctionReturn(PETSC_SUCCESS); 680 } 681 682 static PetscErrorCode PCDestroy_MPI(PC pc) 683 { 684 PCMPICommand request = PCMPI_DESTROY; 685 686 PetscFunctionBegin; 687 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 688 PetscCall(PCMPIDestroy(pc)); 689 PetscCall(PetscFree(pc->data)); 690 PetscFunctionReturn(PETSC_SUCCESS); 691 } 692 693 /* 694 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 695 */ 696 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 697 { 698 PC_MPI *km = (PC_MPI *)pc->data; 699 MPI_Comm comm; 700 PetscMPIInt size; 701 702 PetscFunctionBegin; 703 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 704 PetscCallMPI(MPI_Comm_size(comm, &size)); 705 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 706 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 707 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 708 PetscFunctionReturn(PETSC_SUCCESS); 709 } 710 711 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 712 { 713 PC_MPI *km = (PC_MPI *)pc->data; 714 715 PetscFunctionBegin; 716 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 717 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 718 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 719 PetscOptionsHeadEnd(); 720 PetscFunctionReturn(PETSC_SUCCESS); 721 } 722 723 /*MC 724 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 725 726 Options Database Keys for the Server: 727 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 728 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 729 730 Options Database Keys for a specific `KSP` object 731 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 732 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 733 734 Level: developer 735 736 Notes: 737 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 738 739 It can be particularly useful for user OpenMP code or potentially user GPU code. 740 741 When the program is running with a single MPI process then it directly uses the provided matrix and right hand side 742 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 743 744 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 745 because they are not the actual solver objects. 746 747 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 748 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 749 750 Developer Note: 751 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 752 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 753 754 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 755 M*/ 756 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 757 { 758 PC_MPI *km; 759 char *found = NULL; 760 761 PetscFunctionBegin; 762 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 763 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 764 765 /* material from PCSetType() */ 766 PetscTryTypeMethod(pc, destroy); 767 pc->ops->destroy = NULL; 768 pc->data = NULL; 769 770 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 771 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 772 pc->modifysubmatrices = NULL; 773 pc->modifysubmatricesP = NULL; 774 pc->setupcalled = 0; 775 776 PetscCall(PetscNew(&km)); 777 pc->data = (void *)km; 778 779 km->mincntperrank = 10000; 780 781 pc->ops->setup = PCSetUp_MPI; 782 pc->ops->apply = PCApply_MPI; 783 pc->ops->destroy = PCDestroy_MPI; 784 pc->ops->view = PCView_MPI; 785 pc->ops->setfromoptions = PCSetFromOptions_MPI; 786 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789