1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right-hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 14 #include <petsc/private/kspimpl.h> 15 #include <petscts.h> 16 #include <petsctao.h> 17 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 18 #include <pthread.h> 19 #endif 20 21 #define PC_MPI_MAX_RANKS 256 22 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 23 24 typedef struct { 25 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */ 26 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 27 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 28 PetscInt mincntperrank; /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */ 29 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI process is used for the solve */ 30 } PC_MPI; 31 32 typedef enum { 33 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 34 PCMPI_CREATE, 35 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 36 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 37 PCMPI_SOLVE, 38 PCMPI_VIEW, 39 PCMPI_DESTROY /* destroy a PC that is no longer needed */ 40 } PCMPICommand; 41 42 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 43 static PetscBool PCMPICommSet = PETSC_FALSE; 44 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 45 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 46 static PetscLogEvent EventServerDist, EventServerDistMPI; 47 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 48 static pthread_mutex_t *PCMPIServerLocks; 49 #else 50 static void *PCMPIServerLocks; 51 #endif 52 53 static PetscErrorCode PCMPICommsCreate(void) 54 { 55 MPI_Comm comm = PC_MPI_COMM_WORLD; 56 PetscMPIInt size, rank, i; 57 58 PetscFunctionBegin; 59 PetscCallMPI(MPI_Comm_size(comm, &size)); 60 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 61 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 62 /* comm for size 1 is useful only for debugging */ 63 for (i = 0; i < size; i++) { 64 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 65 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 66 PCMPISolveCounts[i] = 0; 67 PCMPIKSPCounts[i] = 0; 68 PCMPIIterations[i] = 0; 69 PCMPISizes[i] = 0; 70 } 71 PCMPICommSet = PETSC_TRUE; 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 static PetscErrorCode PCMPICommsDestroy(void) 76 { 77 MPI_Comm comm = PC_MPI_COMM_WORLD; 78 PetscMPIInt size, rank, i; 79 80 PetscFunctionBegin; 81 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 82 PetscCallMPI(MPI_Comm_size(comm, &size)); 83 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 84 for (i = 0; i < size; i++) { 85 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 86 } 87 PCMPICommSet = PETSC_FALSE; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode PCMPICreate(PC pc) 92 { 93 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 94 MPI_Comm comm = PC_MPI_COMM_WORLD; 95 KSP ksp; 96 PetscInt N[2], mincntperrank = 0; 97 PetscMPIInt size; 98 Mat sA; 99 char *cprefix = NULL; 100 PetscMPIInt len = 0; 101 102 PetscFunctionBegin; 103 PCMPIServerInSolve = PETSC_TRUE; 104 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 105 PetscCallMPI(MPI_Comm_size(comm, &size)); 106 if (pc) { 107 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 108 PetscCall(PCGetOperators(pc, &sA, &sA)); 109 PetscCall(MatGetSize(sA, &N[0], &N[1])); 110 } 111 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 112 113 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 114 if (km) mincntperrank = km->mincntperrank; 115 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 116 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 117 if (comm == MPI_COMM_NULL) { 118 ksp = NULL; 119 PCMPIServerInSolve = PETSC_FALSE; 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 PetscCall(PetscLogStagePush(PCMPIStage)); 123 PetscCall(KSPCreate(comm, &ksp)); 124 PetscCall(KSPSetNestLevel(ksp, 1)); 125 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 126 PetscCall(PetscLogStagePop()); 127 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 128 if (pc) { 129 size_t slen; 130 const char *prefix = NULL; 131 char *found = NULL; 132 133 PetscCallMPI(MPI_Comm_size(comm, &size)); 134 PCMPIKSPCounts[size - 1]++; 135 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 136 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 137 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 138 PetscCall(PetscStrallocpy(prefix, &cprefix)); 139 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 140 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 141 *found = 0; 142 PetscCall(PetscStrlen(cprefix, &slen)); 143 len = (PetscMPIInt)slen; 144 } 145 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 146 if (len) { 147 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 148 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 149 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 150 } 151 PetscCall(PetscFree(cprefix)); 152 PCMPIServerInSolve = PETSC_FALSE; 153 PetscFunctionReturn(PETSC_SUCCESS); 154 } 155 156 static PetscErrorCode PCMPISetMat(PC pc) 157 { 158 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 159 Mat A; 160 PetscInt m, n, j, bs; 161 Mat sA; 162 MPI_Comm comm = PC_MPI_COMM_WORLD; 163 KSP ksp; 164 PetscLayout layout; 165 const PetscInt *IA = NULL, *JA = NULL, *ia, *ja; 166 const PetscInt *range; 167 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 168 const PetscScalar *a = NULL, *sa; 169 PetscInt matproperties[8] = {0}, rstart, rend; 170 char *cprefix; 171 172 PetscFunctionBegin; 173 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 174 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 175 PCMPIServerInSolve = PETSC_TRUE; 176 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 177 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 178 if (pc) { 179 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 180 const char *prefix; 181 size_t clen; 182 183 PetscCallMPI(MPI_Comm_size(comm, &size)); 184 PCMPIMatCounts[size - 1]++; 185 PetscCall(PCGetOperators(pc, &sA, &sA)); 186 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 187 PetscCall(MatGetBlockSize(sA, &bs)); 188 matproperties[2] = bs; 189 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 190 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 191 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 192 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 193 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 194 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 195 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 196 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 197 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 198 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 199 PetscCall(PetscStrallocpy(prefix, &cprefix)); 200 PetscCall(PetscStrlen(cprefix, &clen)); 201 matproperties[7] = (PetscInt)clen; 202 } 203 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 204 205 /* determine ownership ranges of matrix columns */ 206 PetscCall(PetscLayoutCreate(comm, &layout)); 207 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 208 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 209 PetscCall(PetscLayoutSetUp(layout)); 210 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 211 PetscCall(PetscLayoutDestroy(&layout)); 212 213 /* determine ownership ranges of matrix rows */ 214 PetscCall(PetscLayoutCreate(comm, &layout)); 215 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 216 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 217 PetscCall(PetscLayoutSetUp(layout)); 218 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 219 PetscCall(PetscLayoutGetRange(layout, &rstart, &rend)); 220 221 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 222 /* copy over the matrix nonzero structure and values */ 223 if (pc) { 224 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 225 if (!PCMPIServerUseShmget) { 226 NZ = km->NZ; 227 NZdispl = km->NZdispl; 228 PetscCall(PetscLayoutGetRanges(layout, &range)); 229 for (i = 0; i < size; i++) { 230 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 231 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 232 } 233 displi[0] = 0; 234 NZdispl[0] = 0; 235 for (j = 1; j < size; j++) { 236 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 237 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 238 } 239 } 240 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 241 } 242 PetscCall(PetscLayoutDestroy(&layout)); 243 244 PetscCall(MatCreate(comm, &A)); 245 if (matproperties[7] > 0) { 246 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 247 PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm)); 248 PetscCall(MatSetOptionsPrefix(A, cprefix)); 249 PetscCall(PetscFree(cprefix)); 250 } 251 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 252 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 253 PetscCall(MatSetType(A, MATMPIAIJ)); 254 255 if (!PCMPIServerUseShmget) { 256 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 257 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 258 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, n + 1, MPIU_INT, 0, comm)); 259 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm)); 260 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm)); 261 } else { 262 const void *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa}; 263 PCMPIServerAddresses *addresses; 264 265 PetscCall(PetscNew(&addresses)); 266 addresses->n = 3; 267 PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr)); 268 ia = rstart + (PetscInt *)addresses->addr[0]; 269 ja = ia[0] + (PetscInt *)addresses->addr[1]; 270 a = ia[0] + (PetscScalar *)addresses->addr[2]; 271 PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, (PetscErrorCode(*)(void *))PCMPIServerAddressesDestroy)); 272 } 273 274 if (pc) { 275 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 276 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 277 } 278 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 279 280 PetscCall(PetscLogStagePush(PCMPIStage)); 281 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 282 PetscCall(MatSetBlockSize(A, matproperties[2])); 283 284 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 285 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 286 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 287 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 288 289 if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a)); 290 PetscCall(KSPSetOperators(ksp, A, A)); 291 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 292 PetscCall(PetscLogStagePop()); 293 if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */ 294 const PetscInt *range; 295 296 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 297 for (i = 0; i < size; i++) { 298 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 299 km->displ[i] = (PetscMPIInt)range[i]; 300 } 301 } 302 PetscCall(MatDestroy(&A)); 303 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 304 PetscCall(KSPSetFromOptions(ksp)); 305 PCMPIServerInSolve = PETSC_FALSE; 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 310 { 311 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 312 KSP ksp; 313 Mat sA, A; 314 MPI_Comm comm = PC_MPI_COMM_WORLD; 315 const PetscInt *ia, *IA; 316 const PetscScalar *a; 317 PetscCount nz; 318 const PetscScalar *sa = NULL; 319 PetscMPIInt size; 320 PetscInt rstart, matproperties[4] = {0, 0, 0, 0}; 321 322 PetscFunctionBegin; 323 if (pc) { 324 PetscCall(PCGetOperators(pc, &sA, &sA)); 325 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 326 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 327 } 328 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 329 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 330 PCMPIServerInSolve = PETSC_TRUE; 331 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 332 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 333 PetscCallMPI(MPI_Comm_size(comm, &size)); 334 PCMPIMatCounts[size - 1]++; 335 PetscCall(KSPGetOperators(ksp, NULL, &A)); 336 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 337 if (!PCMPIServerUseShmget) { 338 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 339 PetscCall(PetscMalloc1(nz, &a)); 340 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm)); 341 } else { 342 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 343 PCMPIServerAddresses *addresses; 344 PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses)); 345 ia = rstart + (PetscInt *)addresses->addr[0]; 346 a = ia[0] + (PetscScalar *)addresses->addr[2]; 347 } 348 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 349 if (pc) { 350 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 351 352 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 353 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 354 355 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 356 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 357 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 358 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 359 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 360 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 361 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 362 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 363 } 364 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 365 if (!PCMPIServerUseShmget) PetscCall(PetscFree(a)); 366 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 367 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 368 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 369 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 370 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 371 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 372 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 373 PCMPIServerInSolve = PETSC_FALSE; 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 378 { 379 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 380 KSP ksp; 381 MPI_Comm comm = PC_MPI_COMM_WORLD; 382 const PetscScalar *sb = NULL, *x; 383 PetscScalar *b, *sx = NULL; 384 PetscInt its, n; 385 PetscMPIInt size; 386 void *addr[2]; 387 388 PetscFunctionBegin; 389 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 390 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 391 PCMPIServerInSolve = PETSC_TRUE; 392 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 393 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 394 395 /* scatterv rhs */ 396 PetscCallMPI(MPI_Comm_size(comm, &size)); 397 if (pc) { 398 PetscInt N; 399 400 PCMPISolveCounts[size - 1]++; 401 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 402 PCMPISizes[size - 1] += N; 403 } 404 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 405 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 406 if (!PCMPIServerUseShmget) { 407 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 408 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 409 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 410 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 411 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 412 // TODO: scatter initial guess if needed 413 } else { 414 PetscInt rstart; 415 416 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 417 if (pc) PetscCall(VecGetArray(X, &sx)); 418 const void *inaddr[2] = {(const void **)sb, (const void **)sx}; 419 if (pc) PetscCall(VecRestoreArray(X, &sx)); 420 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 421 422 PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr)); 423 PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL)); 424 PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0])); 425 PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1])); 426 } 427 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 428 429 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 430 PetscCall(PetscLogStagePush(PCMPIStage)); 431 PetscCall(KSPSolve(ksp, NULL, NULL)); 432 PetscCall(PetscLogStagePop()); 433 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 434 PetscCall(KSPGetIterationNumber(ksp, &its)); 435 PCMPIIterations[size - 1] += its; 436 // TODO: send iterations up to outer KSP 437 438 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr)); 439 440 /* gather solution */ 441 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 442 if (!PCMPIServerUseShmget) { 443 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 444 if (pc) PetscCall(VecGetArray(X, &sx)); 445 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 446 if (pc) PetscCall(VecRestoreArray(X, &sx)); 447 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 448 } else { 449 PetscCall(VecResetArray(ksp->vec_rhs)); 450 PetscCall(VecResetArray(ksp->vec_sol)); 451 } 452 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 453 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 454 PCMPIServerInSolve = PETSC_FALSE; 455 PetscFunctionReturn(PETSC_SUCCESS); 456 } 457 458 static PetscErrorCode PCMPIDestroy(PC pc) 459 { 460 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 461 KSP ksp; 462 MPI_Comm comm = PC_MPI_COMM_WORLD; 463 464 PetscFunctionBegin; 465 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 466 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 467 PetscCall(PetscLogStagePush(PCMPIStage)); 468 PCMPIServerInSolve = PETSC_TRUE; 469 PetscCall(KSPDestroy(&ksp)); 470 PetscCall(PetscLogStagePop()); 471 PCMPIServerInSolve = PETSC_FALSE; 472 PetscFunctionReturn(PETSC_SUCCESS); 473 } 474 475 static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request) 476 { 477 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 478 PetscMPIInt dummy1 = 1, dummy2; 479 #endif 480 481 PetscFunctionBegin; 482 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 483 if (PCMPIServerUseShmget) { 484 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]); 485 } 486 #endif 487 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 488 /* next line ensures the sender has already taken the lock */ 489 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 490 if (PCMPIServerUseShmget) { 491 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 492 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]); 493 } 494 #endif 495 PetscFunctionReturn(PETSC_SUCCESS); 496 } 497 498 /*@C 499 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 500 parallel `KSP` solves and management of parallel `KSP` objects. 501 502 Logically Collective on all MPI processes except rank 0 503 504 Options Database Keys: 505 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 506 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 507 - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported) 508 509 Level: developer 510 511 Note: 512 This is normally started automatically in `PetscInitialize()` when the option is provided 513 514 See `PCMPI` for information on using the solver with a `KSP` object 515 516 Developer Notes: 517 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 518 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 519 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 520 521 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 522 523 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 524 525 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 526 527 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 528 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 529 530 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 531 all MPI processes but the user callback only runs on one MPI process per node. 532 533 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 534 the `MPI_Comm` argument from PETSc calls. 535 536 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 537 @*/ 538 PetscErrorCode PCMPIServerBegin(void) 539 { 540 PetscMPIInt rank; 541 542 PetscFunctionBegin; 543 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 544 if (PetscDefined(USE_SINGLE_LIBRARY)) { 545 PetscCall(VecInitializePackage()); 546 PetscCall(MatInitializePackage()); 547 PetscCall(DMInitializePackage()); 548 PetscCall(PCInitializePackage()); 549 PetscCall(KSPInitializePackage()); 550 PetscCall(SNESInitializePackage()); 551 PetscCall(TSInitializePackage()); 552 PetscCall(TaoInitializePackage()); 553 } 554 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 555 PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist)); 556 PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI)); 557 558 if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE; 559 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL)); 560 561 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 562 if (PCMPIServerUseShmget) { 563 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 564 PetscMPIInt size; 565 566 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 567 if (size > 1) { 568 pthread_mutex_t *locks; 569 570 if (rank == 0) { 571 PCMPIServerActive = PETSC_TRUE; 572 PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks)); 573 } 574 PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks)); 575 if (rank == 0) { 576 pthread_mutexattr_t attr; 577 578 pthread_mutexattr_init(&attr); 579 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); 580 581 for (int i = 1; i < size; i++) { 582 pthread_mutex_init(&PCMPIServerLocks[i], &attr); 583 pthread_mutex_lock(&PCMPIServerLocks[i]); 584 } 585 } 586 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 587 } 588 #endif 589 } 590 if (rank == 0) { 591 PETSC_COMM_WORLD = PETSC_COMM_SELF; 592 PCMPIServerActive = PETSC_TRUE; 593 PetscFunctionReturn(PETSC_SUCCESS); 594 } 595 596 while (PETSC_TRUE) { 597 PCMPICommand request = PCMPI_CREATE; 598 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 599 PetscMPIInt dummy1 = 1, dummy2; 600 #endif 601 602 // TODO: can we broadcast the number of active ranks here so only the correct subset of proccesses waits on the later scatters? 603 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 604 if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]); 605 #endif 606 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 607 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 608 if (PCMPIServerUseShmget) { 609 /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */ 610 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 611 pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]); 612 } 613 #endif 614 switch (request) { 615 case PCMPI_CREATE: 616 PetscCall(PCMPICreate(NULL)); 617 break; 618 case PCMPI_SET_MAT: 619 PetscCall(PCMPISetMat(NULL)); 620 break; 621 case PCMPI_UPDATE_MAT_VALUES: 622 PetscCall(PCMPIUpdateMatValues(NULL)); 623 break; 624 case PCMPI_VIEW: 625 // PetscCall(PCMPIView(NULL)); 626 break; 627 case PCMPI_SOLVE: 628 PetscCall(PCMPISolve(NULL, NULL, NULL)); 629 break; 630 case PCMPI_DESTROY: 631 PetscCall(PCMPIDestroy(NULL)); 632 break; 633 case PCMPI_EXIT: 634 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 635 PetscCall(PetscFinalize()); 636 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 637 break; 638 default: 639 break; 640 } 641 } 642 PetscFunctionReturn(PETSC_SUCCESS); 643 } 644 645 /*@C 646 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 647 parallel KSP solves and management of parallel `KSP` objects. 648 649 Logically Collective on all MPI ranks except 0 650 651 Level: developer 652 653 Note: 654 This is normally called automatically in `PetscFinalize()` 655 656 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 657 @*/ 658 PetscErrorCode PCMPIServerEnd(void) 659 { 660 PetscFunctionBegin; 661 if (PetscGlobalRank == 0) { 662 PetscViewer viewer = NULL; 663 PetscViewerFormat format; 664 665 PetscCall(PetscShmgetAddressesFinalize()); 666 PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT)); 667 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 668 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 669 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 670 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 671 PetscOptionsEnd(); 672 if (viewer) { 673 PetscBool isascii; 674 675 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 676 if (isascii) { 677 PetscMPIInt size; 678 PetscMPIInt i; 679 680 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 681 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 682 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 683 if (PCMPIKSPCountsSeq) { 684 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 685 } 686 for (i = 0; i < size; i++) { 687 if (PCMPIKSPCounts[i]) { 688 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 689 } 690 } 691 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not ")); 692 } 693 PetscCall(PetscViewerDestroy(&viewer)); 694 } 695 } 696 PetscCall(PCMPICommsDestroy()); 697 PCMPIServerActive = PETSC_FALSE; 698 PetscFunctionReturn(PETSC_SUCCESS); 699 } 700 701 /* 702 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 703 because, for example, the problem is small. This version is more efficient because it does not require copying any data 704 */ 705 static PetscErrorCode PCSetUp_Seq(PC pc) 706 { 707 PC_MPI *km = (PC_MPI *)pc->data; 708 Mat sA; 709 const char *prefix; 710 char *found = NULL, *cprefix; 711 712 PetscFunctionBegin; 713 PCMPIServerInSolve = PETSC_TRUE; 714 PetscCall(PCGetOperators(pc, NULL, &sA)); 715 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 716 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 717 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 718 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 719 720 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 721 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 722 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 723 PetscCall(PetscStrallocpy(prefix, &cprefix)); 724 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 725 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 726 *found = 0; 727 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 728 PetscCall(PetscFree(cprefix)); 729 730 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 731 PetscCall(KSPSetFromOptions(km->ksps[0])); 732 PetscCall(KSPSetUp(km->ksps[0])); 733 PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 734 PCMPIKSPCountsSeq++; 735 PCMPIServerInSolve = PETSC_FALSE; 736 PetscFunctionReturn(PETSC_SUCCESS); 737 } 738 739 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 740 { 741 PC_MPI *km = (PC_MPI *)pc->data; 742 PetscInt its, n; 743 Mat A; 744 745 PetscFunctionBegin; 746 PCMPIServerInSolve = PETSC_TRUE; 747 PetscCall(KSPSolve(km->ksps[0], b, x)); 748 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 749 PCMPISolveCountsSeq++; 750 PCMPIIterationsSeq += its; 751 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 752 PetscCall(MatGetSize(A, &n, NULL)); 753 PCMPISizesSeq += n; 754 PCMPIServerInSolve = PETSC_FALSE; 755 /* 756 do not keep reference to previous rhs and solution since destroying them in the next KSPSolve() 757 my use PetscFree() instead of PCMPIArrayDeallocate() 758 */ 759 PetscCall(VecDestroy(&km->ksps[0]->vec_rhs)); 760 PetscCall(VecDestroy(&km->ksps[0]->vec_sol)); 761 PetscFunctionReturn(PETSC_SUCCESS); 762 } 763 764 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 765 { 766 PC_MPI *km = (PC_MPI *)pc->data; 767 768 PetscFunctionBegin; 769 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 770 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 771 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 772 PetscFunctionReturn(PETSC_SUCCESS); 773 } 774 775 static PetscErrorCode PCDestroy_Seq(PC pc) 776 { 777 PC_MPI *km = (PC_MPI *)pc->data; 778 Mat A, B; 779 Vec x, b; 780 781 PetscFunctionBegin; 782 PCMPIServerInSolve = PETSC_TRUE; 783 /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */ 784 PetscCall(KSPGetOperators(km->ksps[0], &A, &B)); 785 PetscCall(PetscObjectReference((PetscObject)A)); 786 PetscCall(PetscObjectReference((PetscObject)B)); 787 PetscCall(KSPGetSolution(km->ksps[0], &x)); 788 PetscCall(PetscObjectReference((PetscObject)x)); 789 PetscCall(KSPGetRhs(km->ksps[0], &b)); 790 PetscCall(PetscObjectReference((PetscObject)b)); 791 PetscCall(KSPDestroy(&km->ksps[0])); 792 PetscCall(PetscFree(pc->data)); 793 PCMPIServerInSolve = PETSC_FALSE; 794 PetscCall(MatDestroy(&A)); 795 PetscCall(MatDestroy(&B)); 796 PetscCall(VecDestroy(&x)); 797 PetscCall(VecDestroy(&b)); 798 PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 /* 802 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 803 right-hand side to the parallel PC 804 */ 805 static PetscErrorCode PCSetUp_MPI(PC pc) 806 { 807 PC_MPI *km = (PC_MPI *)pc->data; 808 PetscMPIInt rank, size; 809 PetscBool newmatrix = PETSC_FALSE; 810 811 PetscFunctionBegin; 812 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 813 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 814 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 815 816 if (!pc->setupcalled) { 817 if (!km->alwaysuseserver) { 818 PetscInt n; 819 Mat sA; 820 /* short circuit for small systems */ 821 PetscCall(PCGetOperators(pc, &sA, &sA)); 822 PetscCall(MatGetSize(sA, &n, NULL)); 823 if (n < 2 * km->mincntperrank - 1 || size == 1) { 824 pc->ops->setup = NULL; 825 pc->ops->apply = PCApply_Seq; 826 pc->ops->destroy = PCDestroy_Seq; 827 pc->ops->view = PCView_Seq; 828 PetscCall(PCSetUp_Seq(pc)); 829 PetscFunctionReturn(PETSC_SUCCESS); 830 } 831 } 832 833 PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE)); 834 PetscCall(PCMPICreate(pc)); 835 newmatrix = PETSC_TRUE; 836 } 837 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 838 839 if (newmatrix) { 840 PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 841 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT)); 842 PetscCall(PCMPISetMat(pc)); 843 } else { 844 PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n")); 845 PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES)); 846 PetscCall(PCMPIUpdateMatValues(pc)); 847 } 848 PetscFunctionReturn(PETSC_SUCCESS); 849 } 850 851 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 852 { 853 PetscFunctionBegin; 854 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE)); 855 PetscCall(PCMPISolve(pc, b, x)); 856 PetscFunctionReturn(PETSC_SUCCESS); 857 } 858 859 static PetscErrorCode PCDestroy_MPI(PC pc) 860 { 861 PetscFunctionBegin; 862 PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY)); 863 PetscCall(PCMPIDestroy(pc)); 864 PetscCall(PetscFree(pc->data)); 865 PetscFunctionReturn(PETSC_SUCCESS); 866 } 867 868 /* 869 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database 870 */ 871 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 872 { 873 PC_MPI *km = (PC_MPI *)pc->data; 874 MPI_Comm comm; 875 PetscMPIInt size; 876 877 PetscFunctionBegin; 878 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 879 PetscCallMPI(MPI_Comm_size(comm, &size)); 880 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 881 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %d\n", (int)km->mincntperrank)); 882 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n")); 883 PetscFunctionReturn(PETSC_SUCCESS); 884 } 885 886 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 887 { 888 PC_MPI *km = (PC_MPI *)pc->data; 889 890 PetscFunctionBegin; 891 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 892 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 893 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 894 PetscOptionsHeadEnd(); 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 /*MC 899 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 900 901 Options Database Keys for the Server: 902 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 903 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 904 - -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true 905 906 Options Database Keys for a specific `KSP` object 907 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 908 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 909 910 Level: developer 911 912 Notes: 913 This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or 914 `MatCreateSeqAIJWithArrays()` 915 916 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 917 918 It can be particularly useful for user OpenMP code or potentially user GPU code. 919 920 When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side 921 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 922 923 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 924 because they are not the actual solver objects. 925 926 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 927 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 928 929 Developer Note: 930 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 931 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 932 933 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 934 M*/ 935 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 936 { 937 PC_MPI *km; 938 char *found = NULL; 939 940 PetscFunctionBegin; 941 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 942 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 943 944 /* material from PCSetType() */ 945 PetscTryTypeMethod(pc, destroy); 946 pc->ops->destroy = NULL; 947 pc->data = NULL; 948 949 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 950 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 951 pc->modifysubmatrices = NULL; 952 pc->modifysubmatricesP = NULL; 953 pc->setupcalled = 0; 954 955 PetscCall(PetscNew(&km)); 956 pc->data = (void *)km; 957 958 km->mincntperrank = 10000; 959 960 pc->ops->setup = PCSetUp_MPI; 961 pc->ops->apply = PCApply_MPI; 962 pc->ops->destroy = PCDestroy_MPI; 963 pc->ops->view = PCView_MPI; 964 pc->ops->setfromoptions = PCSetFromOptions_MPI; 965 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 966 PetscFunctionReturn(PETSC_SUCCESS); 967 } 968 969 /*@ 970 PCMPIGetKSP - Gets the `KSP` created by the `PCMPI` 971 972 Not Collective 973 974 Input Parameter: 975 . pc - the preconditioner context 976 977 Output Parameter: 978 . innerksp - the inner `KSP` 979 980 Level: advanced 981 982 .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE` 983 @*/ 984 PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp) 985 { 986 PC_MPI *red = (PC_MPI *)pc->data; 987 988 PetscFunctionBegin; 989 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 990 PetscAssertPointer(innerksp, 2); 991 *innerksp = red->ksps[0]; 992 PetscFunctionReturn(PETSC_SUCCESS); 993 } 994