1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right-hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 #include <petscts.h> 16 #include <petsctao.h> 17 18 #define PC_MPI_MAX_RANKS 256 19 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 20 21 typedef struct { 22 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 23 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 24 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 25 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 26 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 27 } PC_MPI; 28 29 typedef enum { 30 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 31 PCMPI_CREATE, 32 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 33 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 34 PCMPI_SOLVE, 35 PCMPI_VIEW, 36 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 37 } PCMPICommand; 38 39 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 40 static PetscBool PCMPICommSet = PETSC_FALSE; 41 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 42 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 43 44 static PetscErrorCode PCMPICommsCreate(void) 45 { 46 MPI_Comm comm = PC_MPI_COMM_WORLD; 47 PetscMPIInt size, rank, i; 48 49 PetscFunctionBegin; 50 PetscCallMPI(MPI_Comm_size(comm, &size)); 51 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 52 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 53 /* comm for size 1 is useful only for debugging */ 54 for (i = 0; i < size; i++) { 55 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 56 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 57 PCMPISolveCounts[i] = 0; 58 PCMPIKSPCounts[i] = 0; 59 PCMPIIterations[i] = 0; 60 PCMPISizes[i] = 0; 61 } 62 PCMPICommSet = PETSC_TRUE; 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 PetscErrorCode PCMPICommsDestroy(void) 67 { 68 MPI_Comm comm = PC_MPI_COMM_WORLD; 69 PetscMPIInt size, rank, i; 70 71 PetscFunctionBegin; 72 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 73 PetscCallMPI(MPI_Comm_size(comm, &size)); 74 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 75 for (i = 0; i < size; i++) { 76 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 77 } 78 PCMPICommSet = PETSC_FALSE; 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 static PetscErrorCode PCMPICreate(PC pc) 83 { 84 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 85 MPI_Comm comm = PC_MPI_COMM_WORLD; 86 KSP ksp; 87 PetscInt N[2], mincntperrank = 0; 88 PetscMPIInt size; 89 Mat sA; 90 char *cprefix = NULL; 91 PetscMPIInt len = 0; 92 93 PetscFunctionBegin; 94 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 95 PetscCallMPI(MPI_Comm_size(comm, &size)); 96 if (pc) { 97 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 98 PetscCall(PCGetOperators(pc, &sA, &sA)); 99 PetscCall(MatGetSize(sA, &N[0], &N[1])); 100 } 101 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 102 103 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 104 if (km) mincntperrank = km->mincntperrank; 105 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 106 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 107 if (comm == MPI_COMM_NULL) { 108 ksp = NULL; 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 PetscCall(PetscLogStagePush(PCMPIStage)); 112 PetscCall(KSPCreate(comm, &ksp)); 113 PetscCall(KSPSetNestLevel(ksp, 1)); 114 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 115 PetscCall(PetscLogStagePop()); 116 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 117 if (pc) { 118 size_t slen; 119 const char *prefix = NULL; 120 char *found = NULL; 121 122 PetscCallMPI(MPI_Comm_size(comm, &size)); 123 PCMPIKSPCounts[size - 1]++; 124 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 125 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 126 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 127 PetscCall(PetscStrallocpy(prefix, &cprefix)); 128 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 129 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 130 *found = 0; 131 PetscCall(PetscStrlen(cprefix, &slen)); 132 len = (PetscMPIInt)slen; 133 } 134 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 135 if (len) { 136 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 137 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 138 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 139 } 140 PetscCall(PetscFree(cprefix)); 141 PetscFunctionReturn(PETSC_SUCCESS); 142 } 143 144 static PetscErrorCode PCMPISetMat(PC pc) 145 { 146 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 147 Mat A; 148 PetscInt m, n, *ia, *ja, j, bs; 149 Mat sA; 150 MPI_Comm comm = PC_MPI_COMM_WORLD; 151 KSP ksp; 152 PetscLayout layout; 153 const PetscInt *IA = NULL, *JA = NULL; 154 const PetscInt *range; 155 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 156 PetscScalar *a; 157 const PetscScalar *sa = NULL; 158 PetscInt matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0}; 159 char *cprefix; 160 161 PetscFunctionBegin; 162 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 163 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 164 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 165 if (pc) { 166 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 167 const char *prefix; 168 size_t clen; 169 170 PetscCallMPI(MPI_Comm_size(comm, &size)); 171 PCMPIMatCounts[size - 1]++; 172 PetscCall(PCGetOperators(pc, &sA, &sA)); 173 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 174 PetscCall(MatGetBlockSize(sA, &bs)); 175 matproperties[2] = bs; 176 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 177 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 178 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 179 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 180 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 181 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 182 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 183 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 184 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 185 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 186 PetscCall(PetscStrallocpy(prefix, &cprefix)); 187 PetscCall(PetscStrlen(cprefix, &clen)); 188 matproperties[7] = (PetscInt)clen; 189 } 190 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 191 192 /* determine ownership ranges of matrix columns */ 193 PetscCall(PetscLayoutCreate(comm, &layout)); 194 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 195 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 196 PetscCall(PetscLayoutSetUp(layout)); 197 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 198 PetscCall(PetscLayoutDestroy(&layout)); 199 200 /* determine ownership ranges of matrix rows */ 201 PetscCall(PetscLayoutCreate(comm, &layout)); 202 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 203 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 204 PetscCall(PetscLayoutSetUp(layout)); 205 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 206 207 /* copy over the matrix nonzero structure and values */ 208 if (pc) { 209 NZ = km->NZ; 210 NZdispl = km->NZdispl; 211 PetscCall(PetscLayoutGetRanges(layout, &range)); 212 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 213 for (i = 0; i < size; i++) { 214 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 215 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 216 } 217 displi[0] = 0; 218 NZdispl[0] = 0; 219 for (j = 1; j < size; j++) { 220 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 221 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 222 } 223 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 224 } 225 PetscCall(PetscLayoutDestroy(&layout)); 226 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 227 228 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 229 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 230 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 231 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 232 233 if (pc) { 234 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 235 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 236 } 237 238 for (j = 1; j < n + 1; j++) ia[j] -= ia[0]; 239 ia[0] = 0; 240 PetscCall(PetscLogStagePush(PCMPIStage)); 241 PetscCall(MatCreate(comm, &A)); 242 if (matproperties[7] > 0) { 243 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 244 PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm)); 245 PetscCall(MatSetOptionsPrefix(A, cprefix)); 246 PetscCall(PetscFree(cprefix)); 247 } 248 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 249 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 250 PetscCall(MatSetType(A, MATMPIAIJ)); 251 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 252 PetscCall(MatSetBlockSize(A, matproperties[2])); 253 254 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 255 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 256 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 257 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 258 259 PetscCall(PetscFree3(ia, ja, a)); 260 PetscCall(KSPSetOperators(ksp, A, A)); 261 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 262 PetscCall(PetscLogStagePop()); 263 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 264 const PetscInt *range; 265 266 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 267 for (i = 0; i < size; i++) { 268 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 269 km->displ[i] = (PetscMPIInt)range[i]; 270 } 271 } 272 PetscCall(MatDestroy(&A)); 273 PetscCall(KSPSetFromOptions(ksp)); 274 PetscFunctionReturn(PETSC_SUCCESS); 275 } 276 277 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 278 { 279 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 280 KSP ksp; 281 Mat sA, A; 282 MPI_Comm comm = PC_MPI_COMM_WORLD; 283 PetscScalar *a; 284 PetscCount nz; 285 const PetscScalar *sa = NULL; 286 PetscMPIInt size; 287 PetscInt matproperties[4] = {0, 0, 0, 0}; 288 289 PetscFunctionBegin; 290 if (pc) { 291 PetscCall(PCGetOperators(pc, &sA, &sA)); 292 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 293 } 294 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 295 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 296 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 297 PetscCallMPI(MPI_Comm_size(comm, &size)); 298 PCMPIMatCounts[size - 1]++; 299 PetscCall(KSPGetOperators(ksp, NULL, &A)); 300 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 301 PetscCall(PetscMalloc1(nz, &a)); 302 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 303 if (pc) { 304 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 305 306 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 307 308 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 309 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 310 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 311 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 312 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 313 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 314 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 315 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 316 } 317 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 318 PetscCall(PetscFree(a)); 319 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 320 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 321 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 322 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 323 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 324 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 329 { 330 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 331 KSP ksp; 332 MPI_Comm comm = PC_MPI_COMM_WORLD; 333 const PetscScalar *sb = NULL, *x; 334 PetscScalar *b, *sx = NULL; 335 PetscInt its, n; 336 PetscMPIInt size; 337 338 PetscFunctionBegin; 339 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 340 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 341 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 342 343 /* TODO: optimize code to not require building counts/displ every time */ 344 345 /* scatterv rhs */ 346 PetscCallMPI(MPI_Comm_size(comm, &size)); 347 if (pc) { 348 PetscInt N; 349 350 PCMPISolveCounts[size - 1]++; 351 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 352 PCMPISizes[size - 1] += N; 353 PetscCall(VecGetArrayRead(B, &sb)); 354 } 355 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 356 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 357 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 358 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 359 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 360 361 PetscCall(PetscLogStagePush(PCMPIStage)); 362 PetscCall(KSPSolve(ksp, NULL, NULL)); 363 PetscCall(PetscLogStagePop()); 364 PetscCall(KSPGetIterationNumber(ksp, &its)); 365 PCMPIIterations[size - 1] += its; 366 367 /* gather solution */ 368 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 369 if (pc) PetscCall(VecGetArray(X, &sx)); 370 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 371 if (pc) PetscCall(VecRestoreArray(X, &sx)); 372 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 373 PetscFunctionReturn(PETSC_SUCCESS); 374 } 375 376 static PetscErrorCode PCMPIDestroy(PC pc) 377 { 378 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 379 KSP ksp; 380 MPI_Comm comm = PC_MPI_COMM_WORLD; 381 382 PetscFunctionBegin; 383 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 384 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 385 PetscCall(PetscLogStagePush(PCMPIStage)); 386 PetscCall(KSPDestroy(&ksp)); 387 PetscCall(PetscLogStagePop()); 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 PetscBool PCMPIServerActive = PETSC_FALSE; 392 393 /*@C 394 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 395 parallel `KSP` solves and management of parallel `KSP` objects. 396 397 Logically Collective on all MPI processes except rank 0 398 399 Options Database Keys: 400 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 401 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 402 403 Level: developer 404 405 Note: 406 This is normally started automatically in `PetscInitialize()` when the option is provided 407 408 See `PCMPI` for information on using the solver with a `KSP` object 409 410 Developer Notes: 411 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 412 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 413 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 414 415 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 416 417 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 418 419 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 420 421 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 422 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 423 424 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 425 all MPI processes but the user callback only runs on one MPI process per node. 426 427 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 428 the `MPI_Comm` argument from PETSc calls. 429 430 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 431 @*/ 432 PetscErrorCode PCMPIServerBegin(void) 433 { 434 PetscMPIInt rank; 435 436 PetscFunctionBegin; 437 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 438 if (PetscDefined(USE_SINGLE_LIBRARY)) { 439 PetscCall(VecInitializePackage()); 440 PetscCall(MatInitializePackage()); 441 PetscCall(DMInitializePackage()); 442 PetscCall(PCInitializePackage()); 443 PetscCall(KSPInitializePackage()); 444 PetscCall(SNESInitializePackage()); 445 PetscCall(TSInitializePackage()); 446 PetscCall(TaoInitializePackage()); 447 } 448 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 449 450 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 451 if (rank == 0) { 452 PETSC_COMM_WORLD = PETSC_COMM_SELF; 453 PCMPIServerActive = PETSC_TRUE; 454 PetscFunctionReturn(PETSC_SUCCESS); 455 } 456 457 while (PETSC_TRUE) { 458 PCMPICommand request = PCMPI_CREATE; 459 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 460 switch (request) { 461 case PCMPI_CREATE: 462 PetscCall(PCMPICreate(NULL)); 463 break; 464 case PCMPI_SET_MAT: 465 PetscCall(PCMPISetMat(NULL)); 466 break; 467 case PCMPI_UPDATE_MAT_VALUES: 468 PetscCall(PCMPIUpdateMatValues(NULL)); 469 break; 470 case PCMPI_VIEW: 471 // PetscCall(PCMPIView(NULL)); 472 break; 473 case PCMPI_SOLVE: 474 PetscCall(PCMPISolve(NULL, NULL, NULL)); 475 break; 476 case PCMPI_DESTROY: 477 PetscCall(PCMPIDestroy(NULL)); 478 break; 479 case PCMPI_EXIT: 480 PetscCall(PetscFinalize()); 481 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 482 break; 483 default: 484 break; 485 } 486 } 487 PetscFunctionReturn(PETSC_SUCCESS); 488 } 489 490 /*@C 491 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 492 parallel KSP solves and management of parallel `KSP` objects. 493 494 Logically Collective on all MPI ranks except 0 495 496 Level: developer 497 498 Note: 499 This is normally ended automatically in `PetscFinalize()` when the option is provided 500 501 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 502 @*/ 503 PetscErrorCode PCMPIServerEnd(void) 504 { 505 PCMPICommand request = PCMPI_EXIT; 506 507 PetscFunctionBegin; 508 if (PetscGlobalRank == 0) { 509 PetscViewer viewer = NULL; 510 PetscViewerFormat format; 511 512 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 513 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 514 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 515 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 516 PetscOptionsEnd(); 517 if (viewer) { 518 PetscBool isascii; 519 520 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 521 if (isascii) { 522 PetscMPIInt size; 523 PetscMPIInt i; 524 525 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 526 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 527 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 528 if (PCMPIKSPCountsSeq) { 529 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 530 } 531 for (i = 0; i < size; i++) { 532 if (PCMPIKSPCounts[i]) { 533 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 534 } 535 } 536 } 537 PetscCall(PetscOptionsRestoreViewer(&viewer)); 538 } 539 } 540 PetscCall(PCMPICommsDestroy()); 541 PCMPIServerActive = PETSC_FALSE; 542 PetscFunctionReturn(PETSC_SUCCESS); 543 } 544 545 /* 546 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 547 because, for example, the problem is small. This version is more efficient because it does not require copying any data 548 */ 549 static PetscErrorCode PCSetUp_Seq(PC pc) 550 { 551 PC_MPI *km = (PC_MPI *)pc->data; 552 Mat sA; 553 const char *prefix; 554 char *found = NULL, *cprefix; 555 556 PetscFunctionBegin; 557 PetscCall(PCGetOperators(pc, NULL, &sA)); 558 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 559 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 560 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 561 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 562 563 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 564 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 565 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 566 PetscCall(PetscStrallocpy(prefix, &cprefix)); 567 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 568 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 569 *found = 0; 570 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 571 PetscCall(PetscFree(cprefix)); 572 573 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 574 PetscCall(KSPSetFromOptions(km->ksps[0])); 575 PetscCall(KSPSetUp(km->ksps[0])); 576 PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 577 PCMPIKSPCountsSeq++; 578 PetscFunctionReturn(PETSC_SUCCESS); 579 } 580 581 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 582 { 583 PC_MPI *km = (PC_MPI *)pc->data; 584 PetscInt its, n; 585 Mat A; 586 587 PetscFunctionBegin; 588 PetscCall(KSPSolve(km->ksps[0], b, x)); 589 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 590 PCMPISolveCountsSeq++; 591 PCMPIIterationsSeq += its; 592 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 593 PetscCall(MatGetSize(A, &n, NULL)); 594 PCMPISizesSeq += n; 595 PetscFunctionReturn(PETSC_SUCCESS); 596 } 597 598 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 599 { 600 PC_MPI *km = (PC_MPI *)pc->data; 601 602 PetscFunctionBegin; 603 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 604 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 605 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 606 PetscFunctionReturn(PETSC_SUCCESS); 607 } 608 609 static PetscErrorCode PCDestroy_Seq(PC pc) 610 { 611 PC_MPI *km = (PC_MPI *)pc->data; 612 613 PetscFunctionBegin; 614 PetscCall(KSPDestroy(&km->ksps[0])); 615 PetscCall(PetscFree(pc->data)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 /* 620 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 621 right-hand side to the parallel PC 622 */ 623 static PetscErrorCode PCSetUp_MPI(PC pc) 624 { 625 PC_MPI *km = (PC_MPI *)pc->data; 626 PetscMPIInt rank, size; 627 PCMPICommand request; 628 PetscBool newmatrix = PETSC_FALSE; 629 630 PetscFunctionBegin; 631 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 632 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 633 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 634 635 if (!pc->setupcalled) { 636 if (!km->alwaysuseserver) { 637 PetscInt n; 638 Mat sA; 639 /* short circuit for small systems */ 640 PetscCall(PCGetOperators(pc, &sA, &sA)); 641 PetscCall(MatGetSize(sA, &n, NULL)); 642 if (n < 2 * km->mincntperrank - 1 || size == 1) { 643 pc->ops->setup = NULL; 644 pc->ops->apply = PCApply_Seq; 645 pc->ops->destroy = PCDestroy_Seq; 646 pc->ops->view = PCView_Seq; 647 PetscCall(PCSetUp_Seq(pc)); 648 PetscFunctionReturn(PETSC_SUCCESS); 649 } 650 } 651 652 request = PCMPI_CREATE; 653 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 654 PetscCall(PCMPICreate(pc)); 655 newmatrix = PETSC_TRUE; 656 } 657 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 658 659 if (newmatrix) { 660 PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 661 request = PCMPI_SET_MAT; 662 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 663 PetscCall(PCMPISetMat(pc)); 664 } else { 665 PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n")); 666 request = PCMPI_UPDATE_MAT_VALUES; 667 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 668 PetscCall(PCMPIUpdateMatValues(pc)); 669 } 670 PetscFunctionReturn(PETSC_SUCCESS); 671 } 672 673 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 674 { 675 PCMPICommand request = PCMPI_SOLVE; 676 677 PetscFunctionBegin; 678 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 679 PetscCall(PCMPISolve(pc, b, x)); 680 PetscFunctionReturn(PETSC_SUCCESS); 681 } 682 683 static PetscErrorCode PCDestroy_MPI(PC pc) 684 { 685 PCMPICommand request = PCMPI_DESTROY; 686 687 PetscFunctionBegin; 688 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 689 PetscCall(PCMPIDestroy(pc)); 690 PetscCall(PetscFree(pc->data)); 691 PetscFunctionReturn(PETSC_SUCCESS); 692 } 693 694 /* 695 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 696 */ 697 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 698 { 699 PC_MPI *km = (PC_MPI *)pc->data; 700 MPI_Comm comm; 701 PetscMPIInt size; 702 703 PetscFunctionBegin; 704 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 705 PetscCallMPI(MPI_Comm_size(comm, &size)); 706 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 707 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 708 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 713 { 714 PC_MPI *km = (PC_MPI *)pc->data; 715 716 PetscFunctionBegin; 717 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 718 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 719 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 720 PetscOptionsHeadEnd(); 721 PetscFunctionReturn(PETSC_SUCCESS); 722 } 723 724 /*MC 725 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 726 727 Options Database Keys for the Server: 728 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 729 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 730 731 Options Database Keys for a specific `KSP` object 732 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 733 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 734 735 Level: developer 736 737 Notes: 738 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 739 740 It can be particularly useful for user OpenMP code or potentially user GPU code. 741 742 When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side 743 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 744 745 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 746 because they are not the actual solver objects. 747 748 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 749 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 750 751 Developer Note: 752 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 753 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 754 755 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 756 M*/ 757 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 758 { 759 PC_MPI *km; 760 char *found = NULL; 761 762 PetscFunctionBegin; 763 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 764 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 765 766 /* material from PCSetType() */ 767 PetscTryTypeMethod(pc, destroy); 768 pc->ops->destroy = NULL; 769 pc->data = NULL; 770 771 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 772 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 773 pc->modifysubmatrices = NULL; 774 pc->modifysubmatricesP = NULL; 775 pc->setupcalled = 0; 776 777 PetscCall(PetscNew(&km)); 778 pc->data = (void *)km; 779 780 km->mincntperrank = 10000; 781 782 pc->ops->setup = PCSetUp_MPI; 783 pc->ops->apply = PCApply_MPI; 784 pc->ops->destroy = PCDestroy_MPI; 785 pc->ops->view = PCView_MPI; 786 pc->ops->setfromoptions = PCSetFromOptions_MPI; 787 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 788 PetscFunctionReturn(PETSC_SUCCESS); 789 } 790