1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 #include <petsc.h> 16 17 #define PC_MPI_MAX_RANKS 256 18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 19 20 typedef struct { 21 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 22 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 23 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 24 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 25 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 26 } PC_MPI; 27 28 typedef enum { 29 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 30 PCMPI_CREATE, 31 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 32 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 33 PCMPI_SOLVE, 34 PCMPI_VIEW, 35 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 36 } PCMPICommand; 37 38 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 39 static PetscBool PCMPICommSet = PETSC_FALSE; 40 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 41 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 42 43 static PetscErrorCode PCMPICommsCreate(void) 44 { 45 MPI_Comm comm = PC_MPI_COMM_WORLD; 46 PetscMPIInt size, rank, i; 47 48 PetscFunctionBegin; 49 PetscCallMPI(MPI_Comm_size(comm, &size)); 50 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 51 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 52 /* comm for size 1 is useful only for debugging */ 53 for (i = 0; i < size; i++) { 54 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 55 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 56 PCMPISolveCounts[i] = 0; 57 PCMPIKSPCounts[i] = 0; 58 PCMPIIterations[i] = 0; 59 PCMPISizes[i] = 0; 60 } 61 PCMPICommSet = PETSC_TRUE; 62 PetscFunctionReturn(PETSC_SUCCESS); 63 } 64 65 PetscErrorCode PCMPICommsDestroy(void) 66 { 67 MPI_Comm comm = PC_MPI_COMM_WORLD; 68 PetscMPIInt size, rank, i; 69 70 PetscFunctionBegin; 71 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 72 PetscCallMPI(MPI_Comm_size(comm, &size)); 73 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 74 for (i = 0; i < size; i++) { 75 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 76 } 77 PCMPICommSet = PETSC_FALSE; 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 static PetscErrorCode PCMPICreate(PC pc) 82 { 83 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 84 MPI_Comm comm = PC_MPI_COMM_WORLD; 85 KSP ksp; 86 PetscInt N[2], mincntperrank = 0; 87 PetscMPIInt size; 88 Mat sA; 89 char *prefix; 90 PetscMPIInt len = 0; 91 92 PetscFunctionBegin; 93 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 94 PetscCallMPI(MPI_Comm_size(comm, &size)); 95 if (pc) { 96 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 97 PetscCall(PCGetOperators(pc, &sA, &sA)); 98 PetscCall(MatGetSize(sA, &N[0], &N[1])); 99 } 100 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 101 102 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 103 if (km) mincntperrank = km->mincntperrank; 104 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 105 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 106 if (comm == MPI_COMM_NULL) { 107 ksp = NULL; 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 PetscCall(PetscLogStagePush(PCMPIStage)); 111 PetscCall(KSPCreate(comm, &ksp)); 112 PetscCall(PetscLogStagePop()); 113 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 114 if (pc) { 115 size_t slen; 116 117 PetscCallMPI(MPI_Comm_size(comm, &size)); 118 PCMPIKSPCounts[size - 1]++; 119 PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix)); 120 PetscCall(PetscStrlen(prefix, &slen)); 121 len = (PetscMPIInt)slen; 122 } 123 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 124 if (len) { 125 if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix)); 126 PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm)); 127 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 128 } 129 PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_")); 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 static PetscErrorCode PCMPISetMat(PC pc) 134 { 135 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 136 Mat A; 137 PetscInt m, n, *ia, *ja, j, bs; 138 Mat sA; 139 MPI_Comm comm = PC_MPI_COMM_WORLD; 140 KSP ksp; 141 PetscLayout layout; 142 const PetscInt *IA = NULL, *JA = NULL; 143 const PetscInt *range; 144 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 145 PetscScalar *a; 146 const PetscScalar *sa = NULL; 147 PetscInt matproperties[7] = {0, 0, 0, 0, 0, 0, 0}; 148 149 PetscFunctionBegin; 150 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 151 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 152 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 153 if (pc) { 154 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 155 156 PetscCallMPI(MPI_Comm_size(comm, &size)); 157 PCMPIMatCounts[size - 1]++; 158 PetscCall(PCGetOperators(pc, &sA, &sA)); 159 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 160 PetscCall(MatGetBlockSize(sA, &bs)); 161 matproperties[2] = bs; 162 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 163 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 164 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 165 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 166 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 167 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 168 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 169 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 170 } 171 PetscCallMPI(MPI_Bcast(matproperties, 7, MPIU_INT, 0, comm)); 172 173 /* determine ownership ranges of matrix columns */ 174 PetscCall(PetscLayoutCreate(comm, &layout)); 175 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 176 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 177 PetscCall(PetscLayoutSetUp(layout)); 178 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 179 PetscCall(PetscLayoutDestroy(&layout)); 180 181 /* determine ownership ranges of matrix rows */ 182 PetscCall(PetscLayoutCreate(comm, &layout)); 183 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 184 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 185 PetscCall(PetscLayoutSetUp(layout)); 186 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 187 188 /* copy over the matrix nonzero structure and values */ 189 if (pc) { 190 NZ = km->NZ; 191 NZdispl = km->NZdispl; 192 PetscCall(PetscLayoutGetRanges(layout, &range)); 193 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 194 for (i = 0; i < size; i++) { 195 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 196 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 197 } 198 displi[0] = 0; 199 NZdispl[0] = 0; 200 for (j = 1; j < size; j++) { 201 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 202 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 203 } 204 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 205 } 206 PetscCall(PetscLayoutDestroy(&layout)); 207 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 208 209 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 210 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 211 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 212 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 213 214 if (pc) { 215 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 216 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 217 } 218 219 for (j = 1; j < n + 1; j++) ia[j] -= ia[0]; 220 ia[0] = 0; 221 PetscCall(PetscLogStagePush(PCMPIStage)); 222 PetscCall(MatCreateMPIAIJWithArrays(comm, m, n, matproperties[0], matproperties[1], ia, ja, a, &A)); 223 PetscCall(MatSetBlockSize(A, matproperties[2])); 224 PetscCall(MatSetOptionsPrefix(A, "mpi_")); 225 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 226 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 227 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 228 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 229 230 PetscCall(PetscFree3(ia, ja, a)); 231 PetscCall(KSPSetOperators(ksp, A, A)); 232 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 233 PetscCall(PetscLogStagePop()); 234 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 235 const PetscInt *range; 236 237 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 238 for (i = 0; i < size; i++) { 239 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 240 km->displ[i] = (PetscMPIInt)range[i]; 241 } 242 } 243 PetscCall(MatDestroy(&A)); 244 PetscCall(KSPSetFromOptions(ksp)); 245 PetscFunctionReturn(PETSC_SUCCESS); 246 } 247 248 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 249 { 250 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 251 KSP ksp; 252 Mat sA, A; 253 MPI_Comm comm = PC_MPI_COMM_WORLD; 254 PetscScalar *a; 255 PetscCount nz; 256 const PetscScalar *sa = NULL; 257 PetscMPIInt size; 258 PetscInt matproperties[4] = {0, 0, 0, 0}; 259 260 PetscFunctionBegin; 261 if (pc) { 262 PetscCall(PCGetOperators(pc, &sA, &sA)); 263 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 264 } 265 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 266 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 267 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 268 PetscCallMPI(MPI_Comm_size(comm, &size)); 269 PCMPIMatCounts[size - 1]++; 270 PetscCall(KSPGetOperators(ksp, NULL, &A)); 271 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 272 PetscCall(PetscMalloc1(nz, &a)); 273 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 274 if (pc) { 275 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 276 277 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 278 279 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 280 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 281 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 282 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 283 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 284 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 285 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 286 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 287 } 288 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 289 PetscCall(PetscFree(a)); 290 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 291 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 292 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 293 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 294 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 295 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 296 PetscFunctionReturn(PETSC_SUCCESS); 297 } 298 299 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 300 { 301 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 302 KSP ksp; 303 MPI_Comm comm = PC_MPI_COMM_WORLD; 304 const PetscScalar *sb = NULL, *x; 305 PetscScalar *b, *sx = NULL; 306 PetscInt its, n; 307 PetscMPIInt size; 308 309 PetscFunctionBegin; 310 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 311 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 312 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 313 314 /* TODO: optimize code to not require building counts/displ every time */ 315 316 /* scatterv rhs */ 317 PetscCallMPI(MPI_Comm_size(comm, &size)); 318 if (pc) { 319 PetscInt N; 320 321 PCMPISolveCounts[size - 1]++; 322 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 323 ; 324 PCMPISizes[size - 1] += N; 325 PetscCall(VecGetArrayRead(B, &sb)); 326 } 327 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 328 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 329 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 330 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 331 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 332 333 PetscCall(PetscLogStagePush(PCMPIStage)); 334 PetscCall(KSPSolve(ksp, NULL, NULL)); 335 PetscCall(PetscLogStagePop()); 336 PetscCall(KSPGetIterationNumber(ksp, &its)); 337 PCMPIIterations[size - 1] += its; 338 339 /* gather solution */ 340 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 341 if (pc) PetscCall(VecGetArray(X, &sx)); 342 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 343 if (pc) PetscCall(VecRestoreArray(X, &sx)); 344 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 345 PetscFunctionReturn(PETSC_SUCCESS); 346 } 347 348 static PetscErrorCode PCMPIDestroy(PC pc) 349 { 350 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 351 KSP ksp; 352 MPI_Comm comm = PC_MPI_COMM_WORLD; 353 354 PetscFunctionBegin; 355 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 356 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 357 PetscCall(KSPDestroy(&ksp)); 358 PetscFunctionReturn(PETSC_SUCCESS); 359 } 360 361 /*@C 362 PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for 363 parallel `KSP` solves and management of parallel `KSP` objects. 364 365 Logically collective on all MPI ranks except 0 366 367 Options Database Keys: 368 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 369 - -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 370 371 Note: 372 This is normally started automatically in `PetscInitialize()` when the option is provided 373 374 Developer Notes: 375 When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 376 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 377 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 378 379 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 380 381 Level: developer 382 383 .seealso: `PCMPIServerEnd()`, `PCMPI` 384 @*/ 385 PetscErrorCode PCMPIServerBegin(void) 386 { 387 PetscMPIInt rank; 388 389 PetscFunctionBegin; 390 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server")); 391 if (PetscDefined(USE_SINGLE_LIBRARY)) { 392 PetscCall(VecInitializePackage()); 393 PetscCall(MatInitializePackage()); 394 PetscCall(DMInitializePackage()); 395 PetscCall(PCInitializePackage()); 396 PetscCall(KSPInitializePackage()); 397 PetscCall(SNESInitializePackage()); 398 PetscCall(TSInitializePackage()); 399 PetscCall(TaoInitializePackage()); 400 } 401 402 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 403 if (rank == 0) { 404 PETSC_COMM_WORLD = PETSC_COMM_SELF; 405 PetscFunctionReturn(PETSC_SUCCESS); 406 } 407 408 while (PETSC_TRUE) { 409 PCMPICommand request = PCMPI_CREATE; 410 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 411 switch (request) { 412 case PCMPI_CREATE: 413 PetscCall(PCMPICreate(NULL)); 414 break; 415 case PCMPI_SET_MAT: 416 PetscCall(PCMPISetMat(NULL)); 417 break; 418 case PCMPI_UPDATE_MAT_VALUES: 419 PetscCall(PCMPIUpdateMatValues(NULL)); 420 break; 421 case PCMPI_VIEW: 422 // PetscCall(PCMPIView(NULL)); 423 break; 424 case PCMPI_SOLVE: 425 PetscCall(PCMPISolve(NULL, NULL, NULL)); 426 break; 427 case PCMPI_DESTROY: 428 PetscCall(PCMPIDestroy(NULL)); 429 break; 430 case PCMPI_EXIT: 431 PetscCall(PetscFinalize()); 432 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 433 break; 434 default: 435 break; 436 } 437 } 438 PetscFunctionReturn(PETSC_SUCCESS); 439 } 440 441 /*@C 442 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 443 parallel KSP solves and management of parallel `KSP` objects. 444 445 Logically collective on all MPI ranks except 0 446 447 Note: 448 This is normally ended automatically in `PetscFinalize()` when the option is provided 449 450 Level: developer 451 452 .seealso: `PCMPIServerBegin()`, `PCMPI` 453 @*/ 454 PetscErrorCode PCMPIServerEnd(void) 455 { 456 PCMPICommand request = PCMPI_EXIT; 457 458 PetscFunctionBegin; 459 if (PetscGlobalRank == 0) { 460 PetscViewer viewer = NULL; 461 PetscViewerFormat format; 462 463 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 464 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 465 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 466 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 467 PetscOptionsEnd(); 468 if (viewer) { 469 PetscBool isascii; 470 471 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 472 if (isascii) { 473 PetscMPIInt size; 474 PetscMPIInt i; 475 476 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 477 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 478 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 479 if (PCMPIKSPCountsSeq) { 480 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 481 } 482 for (i = 0; i < size; i++) { 483 if (PCMPIKSPCounts[i]) { 484 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 485 } 486 } 487 } 488 PetscCall(PetscViewerDestroy(&viewer)); 489 } 490 } 491 PetscCall(PCMPICommsDestroy()); 492 PetscFunctionReturn(PETSC_SUCCESS); 493 } 494 495 /* 496 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 497 because, for example, the problem is small. This version is more efficient because it does not require copying any data 498 */ 499 static PetscErrorCode PCSetUp_Seq(PC pc) 500 { 501 PC_MPI *km = (PC_MPI *)pc->data; 502 Mat sA; 503 const char *prefix; 504 505 PetscFunctionBegin; 506 PetscCall(PCGetOperators(pc, NULL, &sA)); 507 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 508 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 509 PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix)); 510 PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_")); 511 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 512 PetscCall(KSPSetFromOptions(km->ksps[0])); 513 PetscCall(KSPSetUp(km->ksps[0])); 514 PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 515 PCMPIKSPCountsSeq++; 516 PetscFunctionReturn(PETSC_SUCCESS); 517 } 518 519 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 520 { 521 PC_MPI *km = (PC_MPI *)pc->data; 522 PetscInt its, n; 523 Mat A; 524 525 PetscFunctionBegin; 526 PetscCall(KSPSolve(km->ksps[0], b, x)); 527 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 528 PCMPISolveCountsSeq++; 529 PCMPIIterationsSeq += its; 530 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 531 PetscCall(MatGetSize(A, &n, NULL)); 532 PCMPISizesSeq += n; 533 PetscFunctionReturn(PETSC_SUCCESS); 534 } 535 536 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 537 { 538 PC_MPI *km = (PC_MPI *)pc->data; 539 const char *prefix; 540 541 PetscFunctionBegin; 542 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 543 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 544 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 545 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix)); 546 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n")); 547 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 548 PetscFunctionReturn(PETSC_SUCCESS); 549 } 550 551 static PetscErrorCode PCDestroy_Seq(PC pc) 552 { 553 PC_MPI *km = (PC_MPI *)pc->data; 554 555 PetscFunctionBegin; 556 PetscCall(KSPDestroy(&km->ksps[0])); 557 PetscCall(PetscFree(pc->data)); 558 PetscFunctionReturn(PETSC_SUCCESS); 559 } 560 561 /* 562 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 563 right hand side to the parallel PC 564 */ 565 static PetscErrorCode PCSetUp_MPI(PC pc) 566 { 567 PC_MPI *km = (PC_MPI *)pc->data; 568 PetscMPIInt rank, size; 569 PCMPICommand request; 570 PetscBool newmatrix = PETSC_FALSE; 571 572 PetscFunctionBegin; 573 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 574 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 575 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 576 577 if (!pc->setupcalled) { 578 if (!km->alwaysuseserver) { 579 PetscInt n; 580 Mat sA; 581 /* short circuit for small systems */ 582 PetscCall(PCGetOperators(pc, &sA, &sA)); 583 PetscCall(MatGetSize(sA, &n, NULL)); 584 if (n < 2 * km->mincntperrank - 1 || size == 1) { 585 pc->ops->setup = NULL; 586 pc->ops->apply = PCApply_Seq; 587 pc->ops->destroy = PCDestroy_Seq; 588 pc->ops->view = PCView_Seq; 589 PetscCall(PCSetUp_Seq(pc)); 590 PetscFunctionReturn(PETSC_SUCCESS); 591 } 592 } 593 594 request = PCMPI_CREATE; 595 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 596 PetscCall(PCMPICreate(pc)); 597 newmatrix = PETSC_TRUE; 598 } 599 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 600 601 if (newmatrix) { 602 PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 603 request = PCMPI_SET_MAT; 604 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 605 PetscCall(PCMPISetMat(pc)); 606 } else { 607 PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n")); 608 request = PCMPI_UPDATE_MAT_VALUES; 609 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 610 PetscCall(PCMPIUpdateMatValues(pc)); 611 } 612 PetscFunctionReturn(PETSC_SUCCESS); 613 } 614 615 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 616 { 617 PCMPICommand request = PCMPI_SOLVE; 618 619 PetscFunctionBegin; 620 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 621 PetscCall(PCMPISolve(pc, b, x)); 622 PetscFunctionReturn(PETSC_SUCCESS); 623 } 624 625 PetscErrorCode PCDestroy_MPI(PC pc) 626 { 627 PCMPICommand request = PCMPI_DESTROY; 628 629 PetscFunctionBegin; 630 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 631 PetscCall(PCMPIDestroy(pc)); 632 PetscCall(PetscFree(pc->data)); 633 PetscFunctionReturn(PETSC_SUCCESS); 634 } 635 636 /* 637 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 638 */ 639 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 640 { 641 PC_MPI *km = (PC_MPI *)pc->data; 642 MPI_Comm comm; 643 PetscMPIInt size; 644 const char *prefix; 645 646 PetscFunctionBegin; 647 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 648 PetscCallMPI(MPI_Comm_size(comm, &size)); 649 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 650 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 651 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 652 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix)); 653 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n")); 654 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 659 { 660 PC_MPI *km = (PC_MPI *)pc->data; 661 662 PetscFunctionBegin; 663 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 664 PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 665 PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 666 PetscOptionsHeadEnd(); 667 PetscFunctionReturn(PETSC_SUCCESS); 668 } 669 670 /*MC 671 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 672 673 Level: beginner 674 675 Options Database Keys: 676 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 677 . -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 678 . -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 679 - -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes 680 681 Notes: 682 The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_ 683 684 The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for 685 potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware. 686 687 It can be particularly useful for user OpenMP code or potentially user GPU code. 688 689 When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi) 690 and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly. 691 692 The solver options for `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 693 because they are not the actual solver objects. 694 695 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 696 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 697 698 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()` 699 M*/ 700 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 701 { 702 PC_MPI *km; 703 704 PetscFunctionBegin; 705 PetscCall(PetscNew(&km)); 706 pc->data = (void *)km; 707 708 km->mincntperrank = 10000; 709 710 pc->ops->setup = PCSetUp_MPI; 711 pc->ops->apply = PCApply_MPI; 712 pc->ops->destroy = PCDestroy_MPI; 713 pc->ops->view = PCView_MPI; 714 pc->ops->setfromoptions = PCSetFromOptions_MPI; 715 PetscFunctionReturn(PETSC_SUCCESS); 716 } 717