1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 #include <petsc.h> 16 17 #define PC_MPI_MAX_RANKS 256 18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 19 20 typedef struct { 21 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 22 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 23 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 24 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 25 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 26 } PC_MPI; 27 28 typedef enum { 29 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 30 PCMPI_CREATE, 31 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 32 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 33 PCMPI_SOLVE, 34 PCMPI_VIEW, 35 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 36 } PCMPICommand; 37 38 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 39 static PetscBool PCMPICommSet = PETSC_FALSE; 40 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 41 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 42 43 static PetscErrorCode PCMPICommsCreate(void) 44 { 45 MPI_Comm comm = PC_MPI_COMM_WORLD; 46 PetscMPIInt size, rank, i; 47 48 PetscFunctionBegin; 49 PetscCallMPI(MPI_Comm_size(comm, &size)); 50 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 51 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 52 /* comm for size 1 is useful only for debugging */ 53 for (i = 0; i < size; i++) { 54 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 55 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 56 PCMPISolveCounts[i] = 0; 57 PCMPIKSPCounts[i] = 0; 58 PCMPIIterations[i] = 0; 59 PCMPISizes[i] = 0; 60 } 61 PCMPICommSet = PETSC_TRUE; 62 PetscFunctionReturn(PETSC_SUCCESS); 63 } 64 65 PetscErrorCode PCMPICommsDestroy(void) 66 { 67 MPI_Comm comm = PC_MPI_COMM_WORLD; 68 PetscMPIInt size, rank, i; 69 70 PetscFunctionBegin; 71 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 72 PetscCallMPI(MPI_Comm_size(comm, &size)); 73 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 74 for (i = 0; i < size; i++) { 75 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 76 } 77 PCMPICommSet = PETSC_FALSE; 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 static PetscErrorCode PCMPICreate(PC pc) 82 { 83 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 84 MPI_Comm comm = PC_MPI_COMM_WORLD; 85 KSP ksp; 86 PetscInt N[2], mincntperrank = 0; 87 PetscMPIInt size; 88 Mat sA; 89 char *prefix; 90 PetscMPIInt len = 0; 91 92 PetscFunctionBegin; 93 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 94 PetscCallMPI(MPI_Comm_size(comm, &size)); 95 if (pc) { 96 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 97 PetscCall(PCGetOperators(pc, &sA, &sA)); 98 PetscCall(MatGetSize(sA, &N[0], &N[1])); 99 } 100 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 101 102 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 103 if (km) mincntperrank = km->mincntperrank; 104 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 105 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 106 if (comm == MPI_COMM_NULL) { 107 ksp = NULL; 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 PetscCall(PetscLogStagePush(PCMPIStage)); 111 PetscCall(KSPCreate(comm, &ksp)); 112 PetscCall(PetscLogStagePop()); 113 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 114 if (pc) { 115 size_t slen; 116 117 PetscCallMPI(MPI_Comm_size(comm, &size)); 118 PCMPIKSPCounts[size - 1]++; 119 PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix)); 120 PetscCall(PetscStrlen(prefix, &slen)); 121 len = (PetscMPIInt)slen; 122 } 123 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 124 if (len) { 125 if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix)); 126 PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm)); 127 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 128 } 129 PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_")); 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 static PetscErrorCode PCMPISetMat(PC pc) 134 { 135 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 136 Mat A; 137 PetscInt m, n, *ia, *ja, j, bs; 138 Mat sA; 139 MPI_Comm comm = PC_MPI_COMM_WORLD; 140 KSP ksp; 141 PetscLayout layout; 142 const PetscInt *IA = NULL, *JA = NULL; 143 const PetscInt *range; 144 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 145 PetscScalar *a; 146 const PetscScalar *sa = NULL; 147 PetscInt matproperties[7] = {0, 0, 0, 0, 0, 0, 0}; 148 149 PetscFunctionBegin; 150 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 151 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 152 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 153 if (pc) { 154 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 155 156 PetscCallMPI(MPI_Comm_size(comm, &size)); 157 PCMPIMatCounts[size - 1]++; 158 PetscCall(PCGetOperators(pc, &sA, &sA)); 159 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 160 PetscCall(MatGetBlockSize(sA, &bs)); 161 matproperties[2] = bs; 162 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 163 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 164 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 165 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 166 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 167 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 168 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 169 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 170 } 171 PetscCallMPI(MPI_Bcast(matproperties, 7, MPIU_INT, 0, comm)); 172 173 /* determine ownership ranges of matrix columns */ 174 PetscCall(PetscLayoutCreate(comm, &layout)); 175 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 176 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 177 PetscCall(PetscLayoutSetUp(layout)); 178 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 179 PetscCall(PetscLayoutDestroy(&layout)); 180 181 /* determine ownership ranges of matrix rows */ 182 PetscCall(PetscLayoutCreate(comm, &layout)); 183 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 184 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 185 PetscCall(PetscLayoutSetUp(layout)); 186 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 187 188 /* copy over the matrix nonzero structure and values */ 189 if (pc) { 190 NZ = km->NZ; 191 NZdispl = km->NZdispl; 192 PetscCall(PetscLayoutGetRanges(layout, &range)); 193 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 194 for (i = 0; i < size; i++) { 195 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 196 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 197 } 198 displi[0] = 0; 199 NZdispl[0] = 0; 200 for (j = 1; j < size; j++) { 201 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 202 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 203 } 204 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 205 } 206 PetscCall(PetscLayoutDestroy(&layout)); 207 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 208 209 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 210 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 211 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 212 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 213 214 if (pc) { 215 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 216 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 217 } 218 219 for (j = 1; j < n + 1; j++) ia[j] -= ia[0]; 220 ia[0] = 0; 221 PetscCall(PetscLogStagePush(PCMPIStage)); 222 PetscCall(MatCreateMPIAIJWithArrays(comm, m, n, matproperties[0], matproperties[1], ia, ja, a, &A)); 223 PetscCall(MatSetBlockSize(A, matproperties[2])); 224 PetscCall(MatSetOptionsPrefix(A, "mpi_")); 225 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 226 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 227 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 228 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 229 230 PetscCall(PetscFree3(ia, ja, a)); 231 PetscCall(KSPSetOperators(ksp, A, A)); 232 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 233 PetscCall(PetscLogStagePop()); 234 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 235 const PetscInt *range; 236 237 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 238 for (i = 0; i < size; i++) { 239 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 240 km->displ[i] = (PetscMPIInt)range[i]; 241 } 242 } 243 PetscCall(MatDestroy(&A)); 244 PetscCall(KSPSetFromOptions(ksp)); 245 PetscFunctionReturn(PETSC_SUCCESS); 246 } 247 248 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 249 { 250 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 251 KSP ksp; 252 Mat sA, A; 253 MPI_Comm comm = PC_MPI_COMM_WORLD; 254 PetscScalar *a; 255 PetscCount nz; 256 const PetscScalar *sa = NULL; 257 PetscMPIInt size; 258 PetscInt matproperties[4] = {0, 0, 0, 0}; 259 260 PetscFunctionBegin; 261 if (pc) { 262 PetscCall(PCGetOperators(pc, &sA, &sA)); 263 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 264 } 265 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 266 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 267 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 268 PetscCallMPI(MPI_Comm_size(comm, &size)); 269 PCMPIMatCounts[size - 1]++; 270 PetscCall(KSPGetOperators(ksp, NULL, &A)); 271 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 272 PetscCall(PetscMalloc1(nz, &a)); 273 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 274 if (pc) { 275 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 276 277 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 278 279 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 280 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 281 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 282 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 283 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 284 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 285 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 286 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 287 } 288 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 289 PetscCall(PetscFree(a)); 290 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 291 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 292 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 293 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 294 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 295 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 296 PetscFunctionReturn(PETSC_SUCCESS); 297 } 298 299 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 300 { 301 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 302 KSP ksp; 303 MPI_Comm comm = PC_MPI_COMM_WORLD; 304 const PetscScalar *sb = NULL, *x; 305 PetscScalar *b, *sx = NULL; 306 PetscInt its, n; 307 PetscMPIInt size; 308 309 PetscFunctionBegin; 310 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 311 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 312 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 313 314 /* TODO: optimize code to not require building counts/displ every time */ 315 316 /* scatterv rhs */ 317 PetscCallMPI(MPI_Comm_size(comm, &size)); 318 if (pc) { 319 PetscInt N; 320 321 PCMPISolveCounts[size - 1]++; 322 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 323 PCMPISizes[size - 1] += N; 324 PetscCall(VecGetArrayRead(B, &sb)); 325 } 326 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 327 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 328 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 329 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 330 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 331 332 PetscCall(PetscLogStagePush(PCMPIStage)); 333 PetscCall(KSPSolve(ksp, NULL, NULL)); 334 PetscCall(PetscLogStagePop()); 335 PetscCall(KSPGetIterationNumber(ksp, &its)); 336 PCMPIIterations[size - 1] += its; 337 338 /* gather solution */ 339 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 340 if (pc) PetscCall(VecGetArray(X, &sx)); 341 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 342 if (pc) PetscCall(VecRestoreArray(X, &sx)); 343 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode PCMPIDestroy(PC pc) 348 { 349 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 350 KSP ksp; 351 MPI_Comm comm = PC_MPI_COMM_WORLD; 352 353 PetscFunctionBegin; 354 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 355 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 356 PetscCall(KSPDestroy(&ksp)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 /*@C 361 PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for 362 parallel `KSP` solves and management of parallel `KSP` objects. 363 364 Logically Collective on all MPI ranks except 0 365 366 Options Database Keys: 367 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 368 - -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 369 370 Level: developer 371 372 Note: 373 This is normally started automatically in `PetscInitialize()` when the option is provided 374 375 Developer Notes: 376 When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 377 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 378 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 379 380 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 381 382 .seealso: `PCMPIServerEnd()`, `PCMPI` 383 @*/ 384 PetscErrorCode PCMPIServerBegin(void) 385 { 386 PetscMPIInt rank; 387 388 PetscFunctionBegin; 389 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server")); 390 if (PetscDefined(USE_SINGLE_LIBRARY)) { 391 PetscCall(VecInitializePackage()); 392 PetscCall(MatInitializePackage()); 393 PetscCall(DMInitializePackage()); 394 PetscCall(PCInitializePackage()); 395 PetscCall(KSPInitializePackage()); 396 PetscCall(SNESInitializePackage()); 397 PetscCall(TSInitializePackage()); 398 PetscCall(TaoInitializePackage()); 399 } 400 401 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 402 if (rank == 0) { 403 PETSC_COMM_WORLD = PETSC_COMM_SELF; 404 PetscFunctionReturn(PETSC_SUCCESS); 405 } 406 407 while (PETSC_TRUE) { 408 PCMPICommand request = PCMPI_CREATE; 409 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 410 switch (request) { 411 case PCMPI_CREATE: 412 PetscCall(PCMPICreate(NULL)); 413 break; 414 case PCMPI_SET_MAT: 415 PetscCall(PCMPISetMat(NULL)); 416 break; 417 case PCMPI_UPDATE_MAT_VALUES: 418 PetscCall(PCMPIUpdateMatValues(NULL)); 419 break; 420 case PCMPI_VIEW: 421 // PetscCall(PCMPIView(NULL)); 422 break; 423 case PCMPI_SOLVE: 424 PetscCall(PCMPISolve(NULL, NULL, NULL)); 425 break; 426 case PCMPI_DESTROY: 427 PetscCall(PCMPIDestroy(NULL)); 428 break; 429 case PCMPI_EXIT: 430 PetscCall(PetscFinalize()); 431 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 432 break; 433 default: 434 break; 435 } 436 } 437 PetscFunctionReturn(PETSC_SUCCESS); 438 } 439 440 /*@C 441 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 442 parallel KSP solves and management of parallel `KSP` objects. 443 444 Logically Collective on all MPI ranks except 0 445 446 Level: developer 447 448 Note: 449 This is normally ended automatically in `PetscFinalize()` when the option is provided 450 451 .seealso: `PCMPIServerBegin()`, `PCMPI` 452 @*/ 453 PetscErrorCode PCMPIServerEnd(void) 454 { 455 PCMPICommand request = PCMPI_EXIT; 456 457 PetscFunctionBegin; 458 if (PetscGlobalRank == 0) { 459 PetscViewer viewer = NULL; 460 PetscViewerFormat format; 461 462 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 463 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 464 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 465 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 466 PetscOptionsEnd(); 467 if (viewer) { 468 PetscBool isascii; 469 470 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 471 if (isascii) { 472 PetscMPIInt size; 473 PetscMPIInt i; 474 475 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 476 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 477 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 478 if (PCMPIKSPCountsSeq) { 479 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 480 } 481 for (i = 0; i < size; i++) { 482 if (PCMPIKSPCounts[i]) { 483 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 484 } 485 } 486 } 487 PetscCall(PetscViewerDestroy(&viewer)); 488 } 489 } 490 PetscCall(PCMPICommsDestroy()); 491 PetscFunctionReturn(PETSC_SUCCESS); 492 } 493 494 /* 495 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 496 because, for example, the problem is small. This version is more efficient because it does not require copying any data 497 */ 498 static PetscErrorCode PCSetUp_Seq(PC pc) 499 { 500 PC_MPI *km = (PC_MPI *)pc->data; 501 Mat sA; 502 const char *prefix; 503 504 PetscFunctionBegin; 505 PetscCall(PCGetOperators(pc, NULL, &sA)); 506 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 507 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 508 PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix)); 509 PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_")); 510 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 511 PetscCall(KSPSetFromOptions(km->ksps[0])); 512 PetscCall(KSPSetUp(km->ksps[0])); 513 PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 514 PCMPIKSPCountsSeq++; 515 PetscFunctionReturn(PETSC_SUCCESS); 516 } 517 518 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 519 { 520 PC_MPI *km = (PC_MPI *)pc->data; 521 PetscInt its, n; 522 Mat A; 523 524 PetscFunctionBegin; 525 PetscCall(KSPSolve(km->ksps[0], b, x)); 526 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 527 PCMPISolveCountsSeq++; 528 PCMPIIterationsSeq += its; 529 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 530 PetscCall(MatGetSize(A, &n, NULL)); 531 PCMPISizesSeq += n; 532 PetscFunctionReturn(PETSC_SUCCESS); 533 } 534 535 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 536 { 537 PC_MPI *km = (PC_MPI *)pc->data; 538 const char *prefix; 539 540 PetscFunctionBegin; 541 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 542 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 543 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 544 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix)); 545 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n")); 546 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 static PetscErrorCode PCDestroy_Seq(PC pc) 551 { 552 PC_MPI *km = (PC_MPI *)pc->data; 553 554 PetscFunctionBegin; 555 PetscCall(KSPDestroy(&km->ksps[0])); 556 PetscCall(PetscFree(pc->data)); 557 PetscFunctionReturn(PETSC_SUCCESS); 558 } 559 560 /* 561 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 562 right hand side to the parallel PC 563 */ 564 static PetscErrorCode PCSetUp_MPI(PC pc) 565 { 566 PC_MPI *km = (PC_MPI *)pc->data; 567 PetscMPIInt rank, size; 568 PCMPICommand request; 569 PetscBool newmatrix = PETSC_FALSE; 570 571 PetscFunctionBegin; 572 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 573 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 574 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 575 576 if (!pc->setupcalled) { 577 if (!km->alwaysuseserver) { 578 PetscInt n; 579 Mat sA; 580 /* short circuit for small systems */ 581 PetscCall(PCGetOperators(pc, &sA, &sA)); 582 PetscCall(MatGetSize(sA, &n, NULL)); 583 if (n < 2 * km->mincntperrank - 1 || size == 1) { 584 pc->ops->setup = NULL; 585 pc->ops->apply = PCApply_Seq; 586 pc->ops->destroy = PCDestroy_Seq; 587 pc->ops->view = PCView_Seq; 588 PetscCall(PCSetUp_Seq(pc)); 589 PetscFunctionReturn(PETSC_SUCCESS); 590 } 591 } 592 593 request = PCMPI_CREATE; 594 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 595 PetscCall(PCMPICreate(pc)); 596 newmatrix = PETSC_TRUE; 597 } 598 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 599 600 if (newmatrix) { 601 PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 602 request = PCMPI_SET_MAT; 603 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 604 PetscCall(PCMPISetMat(pc)); 605 } else { 606 PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n")); 607 request = PCMPI_UPDATE_MAT_VALUES; 608 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 609 PetscCall(PCMPIUpdateMatValues(pc)); 610 } 611 PetscFunctionReturn(PETSC_SUCCESS); 612 } 613 614 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 615 { 616 PCMPICommand request = PCMPI_SOLVE; 617 618 PetscFunctionBegin; 619 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 620 PetscCall(PCMPISolve(pc, b, x)); 621 PetscFunctionReturn(PETSC_SUCCESS); 622 } 623 624 PetscErrorCode PCDestroy_MPI(PC pc) 625 { 626 PCMPICommand request = PCMPI_DESTROY; 627 628 PetscFunctionBegin; 629 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 630 PetscCall(PCMPIDestroy(pc)); 631 PetscCall(PetscFree(pc->data)); 632 PetscFunctionReturn(PETSC_SUCCESS); 633 } 634 635 /* 636 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 637 */ 638 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 639 { 640 PC_MPI *km = (PC_MPI *)pc->data; 641 MPI_Comm comm; 642 PetscMPIInt size; 643 const char *prefix; 644 645 PetscFunctionBegin; 646 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 647 PetscCallMPI(MPI_Comm_size(comm, &size)); 648 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 649 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 650 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 651 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix)); 652 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n")); 653 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 654 PetscFunctionReturn(PETSC_SUCCESS); 655 } 656 657 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 658 { 659 PC_MPI *km = (PC_MPI *)pc->data; 660 661 PetscFunctionBegin; 662 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 663 PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 664 PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 665 PetscOptionsHeadEnd(); 666 PetscFunctionReturn(PETSC_SUCCESS); 667 } 668 669 /*MC 670 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 671 672 Options Database Keys: 673 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 674 . -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 675 . -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 676 - -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes 677 678 Level: beginner 679 680 Notes: 681 The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_ 682 683 The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for 684 potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware. 685 686 It can be particularly useful for user OpenMP code or potentially user GPU code. 687 688 When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi) 689 and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly. 690 691 The solver options for `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 692 because they are not the actual solver objects. 693 694 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 695 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 696 697 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()` 698 M*/ 699 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 700 { 701 PC_MPI *km; 702 703 PetscFunctionBegin; 704 PetscCall(PetscNew(&km)); 705 pc->data = (void *)km; 706 707 km->mincntperrank = 10000; 708 709 pc->ops->setup = PCSetUp_MPI; 710 pc->ops->apply = PCApply_MPI; 711 pc->ops->destroy = PCDestroy_MPI; 712 pc->ops->view = PCView_MPI; 713 pc->ops->setfromoptions = PCSetFromOptions_MPI; 714 PetscFunctionReturn(PETSC_SUCCESS); 715 } 716