1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right-hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 14 #include <petsc/private/kspimpl.h> 15 #include <petscts.h> 16 #include <petsctao.h> 17 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 18 #include <pthread.h> 19 #endif 20 21 #define PC_MPI_MAX_RANKS 256 22 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 23 24 typedef struct { 25 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */ 26 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 27 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 28 PetscInt mincntperrank; /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */ 29 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI process is used for the solve */ 30 } PC_MPI; 31 32 typedef enum { 33 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 34 PCMPI_CREATE, 35 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 36 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 37 PCMPI_SOLVE, 38 PCMPI_VIEW, 39 PCMPI_DESTROY /* destroy a PC that is no longer needed */ 40 } PCMPICommand; 41 42 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 43 static PetscBool PCMPICommSet = PETSC_FALSE; 44 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 45 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 46 static PetscLogEvent EventServerDist, EventServerDistMPI; 47 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 48 static pthread_mutex_t *PCMPIServerLocks; 49 #else 50 static void *PCMPIServerLocks; 51 #endif 52 53 static PetscErrorCode PCMPICommsCreate(void) 54 { 55 MPI_Comm comm = PC_MPI_COMM_WORLD; 56 PetscMPIInt size, rank, i; 57 58 PetscFunctionBegin; 59 PetscCallMPI(MPI_Comm_size(comm, &size)); 60 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 61 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 62 /* comm for size 1 is useful only for debugging */ 63 for (i = 0; i < size; i++) { 64 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 65 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 66 PCMPISolveCounts[i] = 0; 67 PCMPIKSPCounts[i] = 0; 68 PCMPIIterations[i] = 0; 69 PCMPISizes[i] = 0; 70 } 71 PCMPICommSet = PETSC_TRUE; 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 static PetscErrorCode PCMPICommsDestroy(void) 76 { 77 MPI_Comm comm = PC_MPI_COMM_WORLD; 78 PetscMPIInt size, rank, i; 79 80 PetscFunctionBegin; 81 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 82 PetscCallMPI(MPI_Comm_size(comm, &size)); 83 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 84 for (i = 0; i < size; i++) { 85 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 86 } 87 PCMPICommSet = PETSC_FALSE; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode PCMPICreate(PC pc) 92 { 93 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 94 MPI_Comm comm = PC_MPI_COMM_WORLD; 95 KSP ksp; 96 PetscInt N[2], mincntperrank = 0; 97 PetscMPIInt size; 98 Mat sA; 99 char *cprefix = NULL; 100 PetscMPIInt len = 0; 101 102 PetscFunctionBegin; 103 PCMPIServerInSolve = PETSC_TRUE; 104 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 105 PetscCallMPI(MPI_Comm_size(comm, &size)); 106 if (pc) { 107 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 108 PetscCall(PCGetOperators(pc, &sA, &sA)); 109 PetscCall(MatGetSize(sA, &N[0], &N[1])); 110 } 111 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 112 113 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 114 if (km) mincntperrank = km->mincntperrank; 115 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 116 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 117 if (comm == MPI_COMM_NULL) { 118 ksp = NULL; 119 PCMPIServerInSolve = PETSC_FALSE; 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 PetscCall(PetscLogStagePush(PCMPIStage)); 123 PetscCall(KSPCreate(comm, &ksp)); 124 PetscCall(KSPSetNestLevel(ksp, 1)); 125 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 126 PetscCall(PetscLogStagePop()); 127 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 128 if (pc) { 129 size_t slen; 130 const char *prefix = NULL; 131 char *found = NULL; 132 133 PetscCallMPI(MPI_Comm_size(comm, &size)); 134 PCMPIKSPCounts[size - 1]++; 135 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 136 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 137 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 138 PetscCall(PetscStrallocpy(prefix, &cprefix)); 139 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 140 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 141 *found = 0; 142 PetscCall(PetscStrlen(cprefix, &slen)); 143 len = (PetscMPIInt)slen; 144 } 145 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 146 if (len) { 147 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 148 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 149 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 150 } 151 PetscCall(PetscFree(cprefix)); 152 PCMPIServerInSolve = PETSC_FALSE; 153 PetscFunctionReturn(PETSC_SUCCESS); 154 } 155 156 static PetscErrorCode PCMPISetMat(PC pc) 157 { 158 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 159 Mat A; 160 PetscInt m, n, j, bs; 161 Mat sA; 162 MPI_Comm comm = PC_MPI_COMM_WORLD; 163 KSP ksp; 164 PetscLayout layout; 165 const PetscInt *IA = NULL, *JA = NULL, *ia, *ja; 166 const PetscInt *range; 167 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 168 const PetscScalar *a = NULL, *sa; 169 PetscInt matproperties[8] = {0}, rstart, rend; 170 char *cprefix; 171 172 PetscFunctionBegin; 173 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 174 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 175 PCMPIServerInSolve = PETSC_TRUE; 176 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 177 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 178 if (pc) { 179 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 180 const char *prefix; 181 size_t clen; 182 183 PetscCallMPI(MPI_Comm_size(comm, &size)); 184 PCMPIMatCounts[size - 1]++; 185 PetscCall(PCGetOperators(pc, &sA, &sA)); 186 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 187 PetscCall(MatGetBlockSize(sA, &bs)); 188 matproperties[2] = bs; 189 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 190 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 191 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 192 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 193 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 194 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 195 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 196 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 197 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 198 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 199 PetscCall(PetscStrallocpy(prefix, &cprefix)); 200 PetscCall(PetscStrlen(cprefix, &clen)); 201 matproperties[7] = (PetscInt)clen; 202 } 203 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 204 205 /* determine ownership ranges of matrix columns */ 206 PetscCall(PetscLayoutCreate(comm, &layout)); 207 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 208 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 209 PetscCall(PetscLayoutSetUp(layout)); 210 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 211 PetscCall(PetscLayoutDestroy(&layout)); 212 213 /* determine ownership ranges of matrix rows */ 214 PetscCall(PetscLayoutCreate(comm, &layout)); 215 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 216 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 217 PetscCall(PetscLayoutSetUp(layout)); 218 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 219 PetscCall(PetscLayoutGetRange(layout, &rstart, &rend)); 220 221 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 222 /* copy over the matrix nonzero structure and values */ 223 if (pc) { 224 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 225 if (!PCMPIServerUseShmget) { 226 NZ = km->NZ; 227 NZdispl = km->NZdispl; 228 PetscCall(PetscLayoutGetRanges(layout, &range)); 229 for (i = 0; i < size; i++) { 230 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 231 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 232 } 233 displi[0] = 0; 234 NZdispl[0] = 0; 235 for (j = 1; j < size; j++) { 236 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 237 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 238 } 239 } 240 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 241 } 242 PetscCall(PetscLayoutDestroy(&layout)); 243 244 PetscCall(MatCreate(comm, &A)); 245 if (matproperties[7] > 0) { 246 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 247 PetscCallMPI(MPI_Bcast(cprefix, (PetscMPIInt)(matproperties[7] + 1), MPI_CHAR, 0, comm)); 248 PetscCall(MatSetOptionsPrefix(A, cprefix)); 249 PetscCall(PetscFree(cprefix)); 250 } 251 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 252 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 253 PetscCall(MatSetType(A, MATMPIAIJ)); 254 255 if (!PCMPIServerUseShmget) { 256 PetscMPIInt in; 257 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 258 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 259 PetscCall(PetscMPIIntCast(n, &in)); 260 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, in + 1, MPIU_INT, 0, comm)); 261 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm)); 262 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm)); 263 } else { 264 const void *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa}; 265 PCMPIServerAddresses *addresses; 266 267 PetscCall(PetscNew(&addresses)); 268 addresses->n = 3; 269 PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr)); 270 ia = rstart + (PetscInt *)addresses->addr[0]; 271 ja = ia[0] + (PetscInt *)addresses->addr[1]; 272 a = ia[0] + (PetscScalar *)addresses->addr[2]; 273 PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, (PetscErrorCode(*)(void *))PCMPIServerAddressesDestroy)); 274 } 275 276 if (pc) { 277 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 278 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 279 } 280 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 281 282 PetscCall(PetscLogStagePush(PCMPIStage)); 283 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 284 PetscCall(MatSetBlockSize(A, matproperties[2])); 285 286 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 287 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 288 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 289 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 290 291 if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a)); 292 PetscCall(KSPSetOperators(ksp, A, A)); 293 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 294 PetscCall(PetscLogStagePop()); 295 if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */ 296 const PetscInt *range; 297 298 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 299 for (i = 0; i < size; i++) { 300 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 301 km->displ[i] = (PetscMPIInt)range[i]; 302 } 303 } 304 PetscCall(MatDestroy(&A)); 305 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 306 PetscCall(KSPSetFromOptions(ksp)); 307 PCMPIServerInSolve = PETSC_FALSE; 308 PetscFunctionReturn(PETSC_SUCCESS); 309 } 310 311 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 312 { 313 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 314 KSP ksp; 315 Mat sA, A; 316 MPI_Comm comm = PC_MPI_COMM_WORLD; 317 const PetscInt *ia, *IA; 318 const PetscScalar *a; 319 PetscCount nz; 320 const PetscScalar *sa = NULL; 321 PetscMPIInt size; 322 PetscInt rstart, matproperties[4] = {0, 0, 0, 0}; 323 324 PetscFunctionBegin; 325 if (pc) { 326 PetscCall(PCGetOperators(pc, &sA, &sA)); 327 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 328 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 329 } 330 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 331 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 332 PCMPIServerInSolve = PETSC_TRUE; 333 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 334 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 335 PetscCallMPI(MPI_Comm_size(comm, &size)); 336 PCMPIMatCounts[size - 1]++; 337 PetscCall(KSPGetOperators(ksp, NULL, &A)); 338 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 339 if (!PCMPIServerUseShmget) { 340 PetscMPIInt mpi_nz; 341 342 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 343 PetscCall(PetscMPIIntCast(nz, &mpi_nz)); 344 PetscCall(PetscMalloc1(nz, &a)); 345 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, mpi_nz, MPIU_SCALAR, 0, comm)); 346 } else { 347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 348 PCMPIServerAddresses *addresses; 349 PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses)); 350 ia = rstart + (PetscInt *)addresses->addr[0]; 351 a = ia[0] + (PetscScalar *)addresses->addr[2]; 352 } 353 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 354 if (pc) { 355 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 356 357 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 358 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 359 360 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 361 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 362 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 363 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 364 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 365 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 366 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 367 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 368 } 369 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 370 if (!PCMPIServerUseShmget) PetscCall(PetscFree(a)); 371 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 372 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 373 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 374 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 375 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 376 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 377 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 378 PCMPIServerInSolve = PETSC_FALSE; 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 383 { 384 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 385 KSP ksp; 386 MPI_Comm comm = PC_MPI_COMM_WORLD; 387 const PetscScalar *sb = NULL, *x; 388 PetscScalar *b, *sx = NULL; 389 PetscInt its, n; 390 PetscMPIInt size; 391 void *addr[2]; 392 393 PetscFunctionBegin; 394 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 395 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 396 PCMPIServerInSolve = PETSC_TRUE; 397 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 398 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 399 400 /* scatterv rhs */ 401 PetscCallMPI(MPI_Comm_size(comm, &size)); 402 if (pc) { 403 PetscInt N; 404 405 PCMPISolveCounts[size - 1]++; 406 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 407 PCMPISizes[size - 1] += N; 408 } 409 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 410 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 411 if (!PCMPIServerUseShmget) { 412 PetscMPIInt in; 413 414 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 415 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 416 PetscCall(PetscMPIIntCast(n, &in)); 417 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, in, MPIU_SCALAR, 0, comm)); 418 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 419 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 420 // TODO: scatter initial guess if needed 421 } else { 422 PetscInt rstart; 423 424 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 425 if (pc) PetscCall(VecGetArray(X, &sx)); 426 const void *inaddr[2] = {(const void **)sb, (const void **)sx}; 427 if (pc) PetscCall(VecRestoreArray(X, &sx)); 428 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 429 430 PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr)); 431 PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL)); 432 PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0])); 433 PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1])); 434 } 435 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 436 437 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 438 PetscCall(PetscLogStagePush(PCMPIStage)); 439 PetscCall(KSPSolve(ksp, NULL, NULL)); 440 PetscCall(PetscLogStagePop()); 441 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 442 PetscCall(KSPGetIterationNumber(ksp, &its)); 443 PCMPIIterations[size - 1] += its; 444 // TODO: send iterations up to outer KSP 445 446 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr)); 447 448 /* gather solution */ 449 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 450 if (!PCMPIServerUseShmget) { 451 PetscMPIInt in; 452 453 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 454 if (pc) PetscCall(VecGetArray(X, &sx)); 455 PetscCall(PetscMPIIntCast(n, &in)); 456 PetscCallMPI(MPI_Gatherv(x, in, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 457 if (pc) PetscCall(VecRestoreArray(X, &sx)); 458 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 459 } else { 460 PetscCall(VecResetArray(ksp->vec_rhs)); 461 PetscCall(VecResetArray(ksp->vec_sol)); 462 } 463 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 464 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 465 PCMPIServerInSolve = PETSC_FALSE; 466 PetscFunctionReturn(PETSC_SUCCESS); 467 } 468 469 static PetscErrorCode PCMPIDestroy(PC pc) 470 { 471 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 472 KSP ksp; 473 MPI_Comm comm = PC_MPI_COMM_WORLD; 474 475 PetscFunctionBegin; 476 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 477 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 478 PetscCall(PetscLogStagePush(PCMPIStage)); 479 PCMPIServerInSolve = PETSC_TRUE; 480 PetscCall(KSPDestroy(&ksp)); 481 PetscCall(PetscLogStagePop()); 482 PCMPIServerInSolve = PETSC_FALSE; 483 PetscFunctionReturn(PETSC_SUCCESS); 484 } 485 486 static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request) 487 { 488 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 489 PetscMPIInt dummy1 = 1, dummy2; 490 #endif 491 492 PetscFunctionBegin; 493 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 494 if (PCMPIServerUseShmget) { 495 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]); 496 } 497 #endif 498 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 499 /* next line ensures the sender has already taken the lock */ 500 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 501 if (PCMPIServerUseShmget) { 502 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 503 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]); 504 } 505 #endif 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 /*@C 510 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 511 parallel `KSP` solves and management of parallel `KSP` objects. 512 513 Logically Collective on all MPI processes except rank 0 514 515 Options Database Keys: 516 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 517 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 518 - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported) 519 520 Level: developer 521 522 Note: 523 This is normally started automatically in `PetscInitialize()` when the option is provided 524 525 See `PCMPI` for information on using the solver with a `KSP` object 526 527 Developer Notes: 528 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 529 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 530 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 531 532 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 533 534 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 535 536 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 537 538 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 539 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 540 541 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 542 all MPI processes but the user callback only runs on one MPI process per node. 543 544 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 545 the `MPI_Comm` argument from PETSc calls. 546 547 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 548 @*/ 549 PetscErrorCode PCMPIServerBegin(void) 550 { 551 PetscMPIInt rank; 552 553 PetscFunctionBegin; 554 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 555 if (PetscDefined(USE_SINGLE_LIBRARY)) { 556 PetscCall(VecInitializePackage()); 557 PetscCall(MatInitializePackage()); 558 PetscCall(DMInitializePackage()); 559 PetscCall(PCInitializePackage()); 560 PetscCall(KSPInitializePackage()); 561 PetscCall(SNESInitializePackage()); 562 PetscCall(TSInitializePackage()); 563 PetscCall(TaoInitializePackage()); 564 } 565 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 566 PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist)); 567 PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI)); 568 569 if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE; 570 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL)); 571 572 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 573 if (PCMPIServerUseShmget) { 574 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 575 PetscMPIInt size; 576 577 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 578 if (size > 1) { 579 pthread_mutex_t *locks; 580 581 if (rank == 0) { 582 PCMPIServerActive = PETSC_TRUE; 583 PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks)); 584 } 585 PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks)); 586 if (rank == 0) { 587 pthread_mutexattr_t attr; 588 589 pthread_mutexattr_init(&attr); 590 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); 591 592 for (int i = 1; i < size; i++) { 593 pthread_mutex_init(&PCMPIServerLocks[i], &attr); 594 pthread_mutex_lock(&PCMPIServerLocks[i]); 595 } 596 } 597 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 598 } 599 #endif 600 } 601 if (rank == 0) { 602 PETSC_COMM_WORLD = PETSC_COMM_SELF; 603 PCMPIServerActive = PETSC_TRUE; 604 PetscFunctionReturn(PETSC_SUCCESS); 605 } 606 607 while (PETSC_TRUE) { 608 PCMPICommand request = PCMPI_CREATE; 609 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 610 PetscMPIInt dummy1 = 1, dummy2; 611 #endif 612 613 // TODO: can we broadcast the number of active ranks here so only the correct subset of proccesses waits on the later scatters? 614 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 615 if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]); 616 #endif 617 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 618 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 619 if (PCMPIServerUseShmget) { 620 /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */ 621 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 622 pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]); 623 } 624 #endif 625 switch (request) { 626 case PCMPI_CREATE: 627 PetscCall(PCMPICreate(NULL)); 628 break; 629 case PCMPI_SET_MAT: 630 PetscCall(PCMPISetMat(NULL)); 631 break; 632 case PCMPI_UPDATE_MAT_VALUES: 633 PetscCall(PCMPIUpdateMatValues(NULL)); 634 break; 635 case PCMPI_VIEW: 636 // PetscCall(PCMPIView(NULL)); 637 break; 638 case PCMPI_SOLVE: 639 PetscCall(PCMPISolve(NULL, NULL, NULL)); 640 break; 641 case PCMPI_DESTROY: 642 PetscCall(PCMPIDestroy(NULL)); 643 break; 644 case PCMPI_EXIT: 645 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 646 PetscCall(PetscFinalize()); 647 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 648 break; 649 default: 650 break; 651 } 652 } 653 PetscFunctionReturn(PETSC_SUCCESS); 654 } 655 656 /*@C 657 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 658 parallel KSP solves and management of parallel `KSP` objects. 659 660 Logically Collective on all MPI ranks except 0 661 662 Level: developer 663 664 Note: 665 This is normally called automatically in `PetscFinalize()` 666 667 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 668 @*/ 669 PetscErrorCode PCMPIServerEnd(void) 670 { 671 PetscFunctionBegin; 672 if (PetscGlobalRank == 0) { 673 PetscViewer viewer = NULL; 674 PetscViewerFormat format; 675 676 PetscCall(PetscShmgetAddressesFinalize()); 677 PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT)); 678 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 679 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 680 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 681 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 682 PetscOptionsEnd(); 683 if (viewer) { 684 PetscBool isascii; 685 686 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 687 if (isascii) { 688 PetscMPIInt size; 689 PetscMPIInt i; 690 691 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 692 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 693 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 694 if (PCMPIKSPCountsSeq) { 695 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 696 } 697 for (i = 0; i < size; i++) { 698 if (PCMPIKSPCounts[i]) { 699 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 700 } 701 } 702 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not ")); 703 } 704 PetscCall(PetscViewerDestroy(&viewer)); 705 } 706 } 707 PetscCall(PCMPICommsDestroy()); 708 PCMPIServerActive = PETSC_FALSE; 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 /* 713 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 714 because, for example, the problem is small. This version is more efficient because it does not require copying any data 715 */ 716 static PetscErrorCode PCSetUp_Seq(PC pc) 717 { 718 PC_MPI *km = (PC_MPI *)pc->data; 719 Mat sA; 720 const char *prefix; 721 char *found = NULL, *cprefix; 722 723 PetscFunctionBegin; 724 PCMPIServerInSolve = PETSC_TRUE; 725 PetscCall(PCGetOperators(pc, NULL, &sA)); 726 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 727 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 728 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 729 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 730 731 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 732 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 733 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 734 PetscCall(PetscStrallocpy(prefix, &cprefix)); 735 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 736 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 737 *found = 0; 738 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 739 PetscCall(PetscFree(cprefix)); 740 741 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 742 PetscCall(KSPSetFromOptions(km->ksps[0])); 743 PetscCall(KSPSetUp(km->ksps[0])); 744 PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 745 PCMPIKSPCountsSeq++; 746 PCMPIServerInSolve = PETSC_FALSE; 747 PetscFunctionReturn(PETSC_SUCCESS); 748 } 749 750 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 751 { 752 PC_MPI *km = (PC_MPI *)pc->data; 753 PetscInt its, n; 754 Mat A; 755 756 PetscFunctionBegin; 757 PCMPIServerInSolve = PETSC_TRUE; 758 PetscCall(KSPSolve(km->ksps[0], b, x)); 759 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 760 PCMPISolveCountsSeq++; 761 PCMPIIterationsSeq += its; 762 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 763 PetscCall(MatGetSize(A, &n, NULL)); 764 PCMPISizesSeq += n; 765 PCMPIServerInSolve = PETSC_FALSE; 766 /* 767 do not keep reference to previous rhs and solution since destroying them in the next KSPSolve() 768 my use PetscFree() instead of PCMPIArrayDeallocate() 769 */ 770 PetscCall(VecDestroy(&km->ksps[0]->vec_rhs)); 771 PetscCall(VecDestroy(&km->ksps[0]->vec_sol)); 772 PetscFunctionReturn(PETSC_SUCCESS); 773 } 774 775 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 776 { 777 PC_MPI *km = (PC_MPI *)pc->data; 778 779 PetscFunctionBegin; 780 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 781 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 782 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 783 PetscFunctionReturn(PETSC_SUCCESS); 784 } 785 786 static PetscErrorCode PCDestroy_Seq(PC pc) 787 { 788 PC_MPI *km = (PC_MPI *)pc->data; 789 Mat A, B; 790 Vec x, b; 791 792 PetscFunctionBegin; 793 PCMPIServerInSolve = PETSC_TRUE; 794 /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */ 795 PetscCall(KSPGetOperators(km->ksps[0], &A, &B)); 796 PetscCall(PetscObjectReference((PetscObject)A)); 797 PetscCall(PetscObjectReference((PetscObject)B)); 798 PetscCall(KSPGetSolution(km->ksps[0], &x)); 799 PetscCall(PetscObjectReference((PetscObject)x)); 800 PetscCall(KSPGetRhs(km->ksps[0], &b)); 801 PetscCall(PetscObjectReference((PetscObject)b)); 802 PetscCall(KSPDestroy(&km->ksps[0])); 803 PetscCall(PetscFree(pc->data)); 804 PCMPIServerInSolve = PETSC_FALSE; 805 PetscCall(MatDestroy(&A)); 806 PetscCall(MatDestroy(&B)); 807 PetscCall(VecDestroy(&x)); 808 PetscCall(VecDestroy(&b)); 809 PetscFunctionReturn(PETSC_SUCCESS); 810 } 811 812 /* 813 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 814 right-hand side to the parallel PC 815 */ 816 static PetscErrorCode PCSetUp_MPI(PC pc) 817 { 818 PC_MPI *km = (PC_MPI *)pc->data; 819 PetscMPIInt rank, size; 820 PetscBool newmatrix = PETSC_FALSE; 821 822 PetscFunctionBegin; 823 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 824 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 825 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 826 827 if (!pc->setupcalled) { 828 if (!km->alwaysuseserver) { 829 PetscInt n; 830 Mat sA; 831 /* short circuit for small systems */ 832 PetscCall(PCGetOperators(pc, &sA, &sA)); 833 PetscCall(MatGetSize(sA, &n, NULL)); 834 if (n < 2 * km->mincntperrank - 1 || size == 1) { 835 pc->ops->setup = NULL; 836 pc->ops->apply = PCApply_Seq; 837 pc->ops->destroy = PCDestroy_Seq; 838 pc->ops->view = PCView_Seq; 839 PetscCall(PCSetUp_Seq(pc)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 } 843 844 PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE)); 845 PetscCall(PCMPICreate(pc)); 846 newmatrix = PETSC_TRUE; 847 } 848 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 849 850 if (newmatrix) { 851 PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 852 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT)); 853 PetscCall(PCMPISetMat(pc)); 854 } else { 855 PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n")); 856 PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES)); 857 PetscCall(PCMPIUpdateMatValues(pc)); 858 } 859 PetscFunctionReturn(PETSC_SUCCESS); 860 } 861 862 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 863 { 864 PetscFunctionBegin; 865 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE)); 866 PetscCall(PCMPISolve(pc, b, x)); 867 PetscFunctionReturn(PETSC_SUCCESS); 868 } 869 870 static PetscErrorCode PCDestroy_MPI(PC pc) 871 { 872 PetscFunctionBegin; 873 PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY)); 874 PetscCall(PCMPIDestroy(pc)); 875 PetscCall(PetscFree(pc->data)); 876 PetscFunctionReturn(PETSC_SUCCESS); 877 } 878 879 /* 880 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database 881 */ 882 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 883 { 884 PC_MPI *km = (PC_MPI *)pc->data; 885 MPI_Comm comm; 886 PetscMPIInt size; 887 888 PetscFunctionBegin; 889 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 890 PetscCallMPI(MPI_Comm_size(comm, &size)); 891 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 892 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %d\n", (int)km->mincntperrank)); 893 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n")); 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 897 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 898 { 899 PC_MPI *km = (PC_MPI *)pc->data; 900 901 PetscFunctionBegin; 902 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 903 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 904 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 905 PetscOptionsHeadEnd(); 906 PetscFunctionReturn(PETSC_SUCCESS); 907 } 908 909 /*MC 910 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 911 912 Options Database Keys for the Server: 913 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 914 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 915 - -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true 916 917 Options Database Keys for a specific `KSP` object 918 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 919 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 920 921 Level: developer 922 923 Notes: 924 This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or 925 `MatCreateSeqAIJWithArrays()` 926 927 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 928 929 It can be particularly useful for user OpenMP code or potentially user GPU code. 930 931 When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side 932 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 933 934 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 935 because they are not the actual solver objects. 936 937 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 938 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 939 940 Developer Note: 941 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 942 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 943 944 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 945 M*/ 946 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 947 { 948 PC_MPI *km; 949 char *found = NULL; 950 951 PetscFunctionBegin; 952 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 953 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 954 955 /* material from PCSetType() */ 956 PetscTryTypeMethod(pc, destroy); 957 pc->ops->destroy = NULL; 958 pc->data = NULL; 959 960 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 961 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 962 pc->modifysubmatrices = NULL; 963 pc->modifysubmatricesP = NULL; 964 pc->setupcalled = 0; 965 966 PetscCall(PetscNew(&km)); 967 pc->data = (void *)km; 968 969 km->mincntperrank = 10000; 970 971 pc->ops->setup = PCSetUp_MPI; 972 pc->ops->apply = PCApply_MPI; 973 pc->ops->destroy = PCDestroy_MPI; 974 pc->ops->view = PCView_MPI; 975 pc->ops->setfromoptions = PCSetFromOptions_MPI; 976 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 977 PetscFunctionReturn(PETSC_SUCCESS); 978 } 979 980 /*@ 981 PCMPIGetKSP - Gets the `KSP` created by the `PCMPI` 982 983 Not Collective 984 985 Input Parameter: 986 . pc - the preconditioner context 987 988 Output Parameter: 989 . innerksp - the inner `KSP` 990 991 Level: advanced 992 993 .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE` 994 @*/ 995 PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp) 996 { 997 PC_MPI *red = (PC_MPI *)pc->data; 998 999 PetscFunctionBegin; 1000 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1001 PetscAssertPointer(innerksp, 2); 1002 *innerksp = red->ksps[0]; 1003 PetscFunctionReturn(PETSC_SUCCESS); 1004 } 1005