1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right-hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 14 #include <petsc/private/kspimpl.h> 15 #include <petscts.h> 16 #include <petsctao.h> 17 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 18 #include <pthread.h> 19 #endif 20 21 #define PC_MPI_MAX_RANKS 256 22 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 23 24 typedef struct { 25 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */ 26 PetscInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 27 PetscInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 28 PetscInt mincntperrank; /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */ 29 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI process is used for the solve */ 30 } PC_MPI; 31 32 typedef enum { 33 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 34 PCMPI_CREATE, 35 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 36 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 37 PCMPI_SOLVE, 38 PCMPI_VIEW, 39 PCMPI_DESTROY /* destroy a PC that is no longer needed */ 40 } PCMPICommand; 41 42 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 43 static PetscBool PCMPICommSet = PETSC_FALSE; 44 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 45 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 46 static PetscLogEvent EventServerDist, EventServerDistMPI; 47 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 48 static pthread_mutex_t *PCMPIServerLocks; 49 #else 50 static void *PCMPIServerLocks; 51 #endif 52 53 static PetscErrorCode PCMPICommsCreate(void) 54 { 55 MPI_Comm comm = PC_MPI_COMM_WORLD; 56 PetscMPIInt size, rank, i; 57 58 PetscFunctionBegin; 59 PetscCallMPI(MPI_Comm_size(comm, &size)); 60 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 61 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 62 /* comm for size 1 is useful only for debugging */ 63 for (i = 0; i < size; i++) { 64 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 65 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 66 PCMPISolveCounts[i] = 0; 67 PCMPIKSPCounts[i] = 0; 68 PCMPIIterations[i] = 0; 69 PCMPISizes[i] = 0; 70 } 71 PCMPICommSet = PETSC_TRUE; 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 static PetscErrorCode PCMPICommsDestroy(void) 76 { 77 MPI_Comm comm = PC_MPI_COMM_WORLD; 78 PetscMPIInt size, rank, i; 79 80 PetscFunctionBegin; 81 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 82 PetscCallMPI(MPI_Comm_size(comm, &size)); 83 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 84 for (i = 0; i < size; i++) { 85 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 86 } 87 PCMPICommSet = PETSC_FALSE; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode PCMPICreate(PC pc) 92 { 93 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 94 MPI_Comm comm = PC_MPI_COMM_WORLD; 95 KSP ksp; 96 PetscInt N[2], mincntperrank = 0; 97 PetscMPIInt size; 98 Mat sA; 99 char *cprefix = NULL; 100 PetscMPIInt len = 0; 101 102 PetscFunctionBegin; 103 PCMPIServerInSolve = PETSC_TRUE; 104 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 105 PetscCallMPI(MPI_Comm_size(comm, &size)); 106 if (pc) { 107 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 108 PetscCall(PCGetOperators(pc, &sA, &sA)); 109 PetscCall(MatGetSize(sA, &N[0], &N[1])); 110 } 111 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 112 113 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 114 if (km) mincntperrank = km->mincntperrank; 115 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 116 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 117 if (comm == MPI_COMM_NULL) { 118 ksp = NULL; 119 PCMPIServerInSolve = PETSC_FALSE; 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 PetscCall(PetscLogStagePush(PCMPIStage)); 123 PetscCall(KSPCreate(comm, &ksp)); 124 PetscCall(KSPSetNestLevel(ksp, 1)); 125 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 126 PetscCall(PetscLogStagePop()); 127 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 128 if (pc) { 129 size_t slen; 130 const char *prefix = NULL; 131 char *found = NULL; 132 133 PetscCallMPI(MPI_Comm_size(comm, &size)); 134 PCMPIKSPCounts[size - 1]++; 135 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 136 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 137 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 138 PetscCall(PetscStrallocpy(prefix, &cprefix)); 139 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 140 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 141 *found = 0; 142 PetscCall(PetscStrlen(cprefix, &slen)); 143 PetscCall(PetscMPIIntCast(slen, &len)); 144 } 145 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 146 if (len) { 147 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 148 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 149 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 150 } 151 PetscCall(PetscFree(cprefix)); 152 PCMPIServerInSolve = PETSC_FALSE; 153 PetscFunctionReturn(PETSC_SUCCESS); 154 } 155 156 static PetscErrorCode PCMPISetMat(PC pc) 157 { 158 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 159 Mat A; 160 PetscInt m, n, j, bs; 161 Mat sA; 162 MPI_Comm comm = PC_MPI_COMM_WORLD; 163 KSP ksp; 164 PetscLayout layout; 165 const PetscInt *IA = NULL, *JA = NULL, *ia, *ja; 166 const PetscInt *range; 167 PetscInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz; 168 PetscMPIInt size, i; 169 const PetscScalar *a = NULL, *sa; 170 PetscInt matproperties[8] = {0}, rstart, rend; 171 char *cprefix; 172 173 PetscFunctionBegin; 174 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 175 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 176 PCMPIServerInSolve = PETSC_TRUE; 177 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 178 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 179 if (pc) { 180 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 181 const char *prefix; 182 size_t clen; 183 184 PetscCallMPI(MPI_Comm_size(comm, &size)); 185 PCMPIMatCounts[size - 1]++; 186 PetscCall(PCGetOperators(pc, &sA, &sA)); 187 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 188 PetscCall(MatGetBlockSize(sA, &bs)); 189 matproperties[2] = bs; 190 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 191 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 192 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 193 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 194 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 195 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 196 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 197 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 198 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 199 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 200 PetscCall(PetscStrallocpy(prefix, &cprefix)); 201 PetscCall(PetscStrlen(cprefix, &clen)); 202 matproperties[7] = (PetscInt)clen; 203 } 204 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 205 206 /* determine ownership ranges of matrix columns */ 207 PetscCall(PetscLayoutCreate(comm, &layout)); 208 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 209 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 210 PetscCall(PetscLayoutSetUp(layout)); 211 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 212 PetscCall(PetscLayoutDestroy(&layout)); 213 214 /* determine ownership ranges of matrix rows */ 215 PetscCall(PetscLayoutCreate(comm, &layout)); 216 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 217 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 218 PetscCall(PetscLayoutSetUp(layout)); 219 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 220 PetscCall(PetscLayoutGetRange(layout, &rstart, &rend)); 221 222 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 223 /* copy over the matrix nonzero structure and values */ 224 if (pc) { 225 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 226 if (!PCMPIServerUseShmget) { 227 NZ = km->NZ; 228 NZdispl = km->NZdispl; 229 PetscCall(PetscLayoutGetRanges(layout, &range)); 230 for (i = 0; i < size; i++) { 231 sendcounti[i] = 1 + range[i + 1] - range[i]; 232 NZ[i] = IA[range[i + 1]] - IA[range[i]]; 233 } 234 displi[0] = 0; 235 NZdispl[0] = 0; 236 for (j = 1; j < size; j++) { 237 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 238 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 239 } 240 } 241 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 242 } 243 PetscCall(PetscLayoutDestroy(&layout)); 244 245 PetscCall(MatCreate(comm, &A)); 246 if (matproperties[7] > 0) { 247 PetscMPIInt ni; 248 249 PetscCall(PetscMPIIntCast(matproperties[7] + 1, &ni)); 250 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 251 PetscCallMPI(MPI_Bcast(cprefix, ni, MPI_CHAR, 0, comm)); 252 PetscCall(MatSetOptionsPrefix(A, cprefix)); 253 PetscCall(PetscFree(cprefix)); 254 } 255 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 256 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 257 PetscCall(MatSetType(A, MATMPIAIJ)); 258 259 if (!PCMPIServerUseShmget) { 260 PetscCallMPI(MPI_Scatter(NZ, 1, MPIU_INT, &nz, 1, MPIU_INT, 0, comm)); 261 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 262 PetscCallMPI(MPIU_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, n + 1, MPIU_INT, 0, comm)); 263 PetscCallMPI(MPIU_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm)); 264 PetscCallMPI(MPIU_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm)); 265 } else { 266 const void *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa}; 267 PCMPIServerAddresses *addresses; 268 269 PetscCall(PetscNew(&addresses)); 270 addresses->n = 3; 271 PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr)); 272 ia = rstart + (PetscInt *)addresses->addr[0]; 273 ja = ia[0] + (PetscInt *)addresses->addr[1]; 274 a = ia[0] + (PetscScalar *)addresses->addr[2]; 275 PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, PCMPIServerAddressesDestroy)); 276 } 277 278 if (pc) { 279 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 280 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 281 } 282 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 283 284 PetscCall(PetscLogStagePush(PCMPIStage)); 285 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 286 PetscCall(MatSetBlockSize(A, matproperties[2])); 287 288 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 289 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 290 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 291 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 292 293 if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a)); 294 PetscCall(KSPSetOperators(ksp, A, A)); 295 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 296 PetscCall(PetscLogStagePop()); 297 if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */ 298 const PetscInt *range; 299 300 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 301 for (i = 0; i < size; i++) { 302 km->sendcount[i] = range[i + 1] - range[i]; 303 km->displ[i] = range[i]; 304 } 305 } 306 PetscCall(MatDestroy(&A)); 307 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 308 PetscCall(KSPSetFromOptions(ksp)); 309 PCMPIServerInSolve = PETSC_FALSE; 310 PetscFunctionReturn(PETSC_SUCCESS); 311 } 312 313 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 314 { 315 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 316 KSP ksp; 317 Mat sA, A; 318 MPI_Comm comm = PC_MPI_COMM_WORLD; 319 const PetscInt *ia, *IA; 320 const PetscScalar *a; 321 PetscCount nz; 322 const PetscScalar *sa = NULL; 323 PetscMPIInt size; 324 PetscInt rstart, matproperties[4] = {0, 0, 0, 0}; 325 326 PetscFunctionBegin; 327 if (pc) { 328 PetscCall(PCGetOperators(pc, &sA, &sA)); 329 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 330 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 331 } 332 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 333 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 334 PCMPIServerInSolve = PETSC_TRUE; 335 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 336 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 337 PetscCallMPI(MPI_Comm_size(comm, &size)); 338 PCMPIMatCounts[size - 1]++; 339 PetscCall(KSPGetOperators(ksp, NULL, &A)); 340 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 341 if (!PCMPIServerUseShmget) { 342 PetscInt petsc_nz; 343 344 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 345 PetscCall(PetscIntCast(nz, &petsc_nz)); 346 PetscCall(PetscMalloc1(nz, &a)); 347 PetscCallMPI(MPIU_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, petsc_nz, MPIU_SCALAR, 0, comm)); 348 } else { 349 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 350 PCMPIServerAddresses *addresses; 351 PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses)); 352 ia = rstart + (PetscInt *)addresses->addr[0]; 353 a = ia[0] + (PetscScalar *)addresses->addr[2]; 354 } 355 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 356 if (pc) { 357 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 358 359 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 360 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 361 362 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 363 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 364 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 365 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 366 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 367 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 368 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 369 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 370 } 371 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 372 if (!PCMPIServerUseShmget) PetscCall(PetscFree(a)); 373 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 374 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 375 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 376 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 377 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 378 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 379 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 380 PCMPIServerInSolve = PETSC_FALSE; 381 PetscFunctionReturn(PETSC_SUCCESS); 382 } 383 384 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 385 { 386 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 387 KSP ksp; 388 MPI_Comm comm = PC_MPI_COMM_WORLD; 389 const PetscScalar *sb = NULL, *x; 390 PetscScalar *b, *sx = NULL; 391 PetscInt its, n; 392 PetscMPIInt size; 393 void *addr[2]; 394 395 PetscFunctionBegin; 396 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 397 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 398 PCMPIServerInSolve = PETSC_TRUE; 399 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 400 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 401 402 /* scatterv rhs */ 403 PetscCallMPI(MPI_Comm_size(comm, &size)); 404 if (pc) { 405 PetscInt N; 406 407 PCMPISolveCounts[size - 1]++; 408 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 409 PCMPISizes[size - 1] += N; 410 } 411 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 412 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 413 if (!PCMPIServerUseShmget) { 414 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 415 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 416 PetscCallMPI(MPIU_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 417 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 418 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 419 // TODO: scatter initial guess if needed 420 } else { 421 PetscInt rstart; 422 423 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 424 if (pc) PetscCall(VecGetArray(X, &sx)); 425 const void *inaddr[2] = {(const void **)sb, (const void **)sx}; 426 if (pc) PetscCall(VecRestoreArray(X, &sx)); 427 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 428 429 PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr)); 430 PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL)); 431 PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0])); 432 PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1])); 433 } 434 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 435 436 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 437 PetscCall(PetscLogStagePush(PCMPIStage)); 438 PetscCall(KSPSolve(ksp, NULL, NULL)); 439 PetscCall(PetscLogStagePop()); 440 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 441 PetscCall(KSPGetIterationNumber(ksp, &its)); 442 PCMPIIterations[size - 1] += its; 443 // TODO: send iterations up to outer KSP 444 445 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr)); 446 447 /* gather solution */ 448 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 449 if (!PCMPIServerUseShmget) { 450 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 451 if (pc) PetscCall(VecGetArray(X, &sx)); 452 PetscCallMPI(MPIU_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 453 if (pc) PetscCall(VecRestoreArray(X, &sx)); 454 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 455 } else { 456 PetscCall(VecResetArray(ksp->vec_rhs)); 457 PetscCall(VecResetArray(ksp->vec_sol)); 458 } 459 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 460 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 461 PCMPIServerInSolve = PETSC_FALSE; 462 PetscFunctionReturn(PETSC_SUCCESS); 463 } 464 465 static PetscErrorCode PCMPIDestroy(PC pc) 466 { 467 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 468 KSP ksp; 469 MPI_Comm comm = PC_MPI_COMM_WORLD; 470 471 PetscFunctionBegin; 472 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 473 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 474 PetscCall(PetscLogStagePush(PCMPIStage)); 475 PCMPIServerInSolve = PETSC_TRUE; 476 PetscCall(KSPDestroy(&ksp)); 477 PetscCall(PetscLogStagePop()); 478 PCMPIServerInSolve = PETSC_FALSE; 479 PetscFunctionReturn(PETSC_SUCCESS); 480 } 481 482 static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request) 483 { 484 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 485 PetscMPIInt dummy1 = 1, dummy2; 486 #endif 487 488 PetscFunctionBegin; 489 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 490 if (PCMPIServerUseShmget) { 491 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]); 492 } 493 #endif 494 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 495 /* next line ensures the sender has already taken the lock */ 496 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 497 if (PCMPIServerUseShmget) { 498 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 499 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]); 500 } 501 #endif 502 PetscFunctionReturn(PETSC_SUCCESS); 503 } 504 505 /*@C 506 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 507 parallel `KSP` solves and management of parallel `KSP` objects. 508 509 Logically Collective on all MPI processes except rank 0 510 511 Options Database Keys: 512 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 513 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 514 - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported) 515 516 Level: developer 517 518 Note: 519 This is normally started automatically in `PetscInitialize()` when the option is provided 520 521 See `PCMPI` for information on using the solver with a `KSP` object 522 523 See `PetscShmgetAllocateArray()` for instructions on how to ensure the shared memory is available on your machine. 524 525 Developer Notes: 526 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 527 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 528 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 529 530 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 531 532 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 533 534 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 535 536 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 537 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 538 539 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 540 all MPI processes but the user callback only runs on one MPI process per node. 541 542 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 543 the `MPI_Comm` argument from PETSc calls. 544 545 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 546 @*/ 547 PetscErrorCode PCMPIServerBegin(void) 548 { 549 PetscMPIInt rank; 550 551 PetscFunctionBegin; 552 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 553 if (PetscDefined(USE_SINGLE_LIBRARY)) { 554 PetscCall(VecInitializePackage()); 555 PetscCall(MatInitializePackage()); 556 PetscCall(DMInitializePackage()); 557 PetscCall(PCInitializePackage()); 558 PetscCall(KSPInitializePackage()); 559 PetscCall(SNESInitializePackage()); 560 PetscCall(TSInitializePackage()); 561 PetscCall(TaoInitializePackage()); 562 } 563 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 564 PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist)); 565 PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI)); 566 567 if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE; 568 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL)); 569 570 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 571 if (PCMPIServerUseShmget) { 572 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 573 PetscMPIInt size; 574 575 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 576 if (size > 1) { 577 pthread_mutex_t *locks; 578 579 if (rank == 0) { 580 PCMPIServerActive = PETSC_TRUE; 581 PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks)); 582 } 583 PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks)); 584 if (rank == 0) { 585 pthread_mutexattr_t attr; 586 587 pthread_mutexattr_init(&attr); 588 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); 589 590 for (int i = 1; i < size; i++) { 591 pthread_mutex_init(&PCMPIServerLocks[i], &attr); 592 pthread_mutex_lock(&PCMPIServerLocks[i]); 593 } 594 } 595 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 596 } 597 #endif 598 } 599 if (rank == 0) { 600 PETSC_COMM_WORLD = PETSC_COMM_SELF; 601 PCMPIServerActive = PETSC_TRUE; 602 PetscFunctionReturn(PETSC_SUCCESS); 603 } 604 605 while (PETSC_TRUE) { 606 PCMPICommand request = PCMPI_CREATE; 607 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 608 PetscMPIInt dummy1 = 1, dummy2; 609 #endif 610 611 // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters? 612 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 613 if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]); 614 #endif 615 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 616 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 617 if (PCMPIServerUseShmget) { 618 /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */ 619 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 620 pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]); 621 } 622 #endif 623 switch (request) { 624 case PCMPI_CREATE: 625 PetscCall(PCMPICreate(NULL)); 626 break; 627 case PCMPI_SET_MAT: 628 PetscCall(PCMPISetMat(NULL)); 629 break; 630 case PCMPI_UPDATE_MAT_VALUES: 631 PetscCall(PCMPIUpdateMatValues(NULL)); 632 break; 633 case PCMPI_VIEW: 634 // PetscCall(PCMPIView(NULL)); 635 break; 636 case PCMPI_SOLVE: 637 PetscCall(PCMPISolve(NULL, NULL, NULL)); 638 break; 639 case PCMPI_DESTROY: 640 PetscCall(PCMPIDestroy(NULL)); 641 break; 642 case PCMPI_EXIT: 643 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 644 PetscCall(PetscFinalize()); 645 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 646 break; 647 default: 648 break; 649 } 650 } 651 PetscFunctionReturn(PETSC_SUCCESS); 652 } 653 654 /*@C 655 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 656 parallel KSP solves and management of parallel `KSP` objects. 657 658 Logically Collective on all MPI ranks except 0 659 660 Level: developer 661 662 Note: 663 This is normally called automatically in `PetscFinalize()` 664 665 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 666 @*/ 667 PetscErrorCode PCMPIServerEnd(void) 668 { 669 PetscFunctionBegin; 670 if (PetscGlobalRank == 0) { 671 PetscViewer viewer = NULL; 672 PetscViewerFormat format; 673 674 PetscCall(PetscShmgetAddressesFinalize()); 675 PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT)); 676 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 677 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 678 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 679 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 680 PetscOptionsEnd(); 681 if (viewer) { 682 PetscBool isascii; 683 684 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 685 if (isascii) { 686 PetscMPIInt size; 687 PetscMPIInt i; 688 689 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 690 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 691 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 692 if (PCMPIKSPCountsSeq) { 693 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 694 } 695 for (i = 0; i < size; i++) { 696 if (PCMPIKSPCounts[i]) { 697 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 698 } 699 } 700 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not ")); 701 } 702 PetscCall(PetscViewerDestroy(&viewer)); 703 } 704 } 705 PetscCall(PCMPICommsDestroy()); 706 PCMPIServerActive = PETSC_FALSE; 707 PetscFunctionReturn(PETSC_SUCCESS); 708 } 709 710 /* 711 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 712 because, for example, the problem is small. This version is more efficient because it does not require copying any data 713 */ 714 static PetscErrorCode PCSetUp_Seq(PC pc) 715 { 716 PC_MPI *km = (PC_MPI *)pc->data; 717 Mat sA; 718 const char *prefix; 719 char *found = NULL, *cprefix; 720 721 PetscFunctionBegin; 722 PCMPIServerInSolve = PETSC_TRUE; 723 PetscCall(PCGetOperators(pc, NULL, &sA)); 724 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 725 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 726 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 727 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 728 729 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 730 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 731 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 732 PetscCall(PetscStrallocpy(prefix, &cprefix)); 733 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 734 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 735 *found = 0; 736 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 737 PetscCall(PetscFree(cprefix)); 738 739 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 740 PetscCall(KSPSetFromOptions(km->ksps[0])); 741 PetscCall(KSPSetUp(km->ksps[0])); 742 PetscCall(PetscInfo(pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 743 PCMPIKSPCountsSeq++; 744 PCMPIServerInSolve = PETSC_FALSE; 745 PetscFunctionReturn(PETSC_SUCCESS); 746 } 747 748 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 749 { 750 PC_MPI *km = (PC_MPI *)pc->data; 751 PetscInt its, n; 752 Mat A; 753 754 PetscFunctionBegin; 755 PCMPIServerInSolve = PETSC_TRUE; 756 PetscCall(KSPSolve(km->ksps[0], b, x)); 757 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 758 PCMPISolveCountsSeq++; 759 PCMPIIterationsSeq += its; 760 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 761 PetscCall(MatGetSize(A, &n, NULL)); 762 PCMPISizesSeq += n; 763 PCMPIServerInSolve = PETSC_FALSE; 764 /* 765 do not keep reference to previous rhs and solution since destroying them in the next KSPSolve() 766 my use PetscFree() instead of PCMPIArrayDeallocate() 767 */ 768 PetscCall(VecDestroy(&km->ksps[0]->vec_rhs)); 769 PetscCall(VecDestroy(&km->ksps[0]->vec_sol)); 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 774 { 775 PC_MPI *km = (PC_MPI *)pc->data; 776 777 PetscFunctionBegin; 778 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 779 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank)); 780 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 781 PetscFunctionReturn(PETSC_SUCCESS); 782 } 783 784 static PetscErrorCode PCDestroy_Seq(PC pc) 785 { 786 PC_MPI *km = (PC_MPI *)pc->data; 787 Mat A, B; 788 Vec x, b; 789 790 PetscFunctionBegin; 791 PCMPIServerInSolve = PETSC_TRUE; 792 /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */ 793 PetscCall(KSPGetOperators(km->ksps[0], &A, &B)); 794 PetscCall(PetscObjectReference((PetscObject)A)); 795 PetscCall(PetscObjectReference((PetscObject)B)); 796 PetscCall(KSPGetSolution(km->ksps[0], &x)); 797 PetscCall(PetscObjectReference((PetscObject)x)); 798 PetscCall(KSPGetRhs(km->ksps[0], &b)); 799 PetscCall(PetscObjectReference((PetscObject)b)); 800 PetscCall(KSPDestroy(&km->ksps[0])); 801 PetscCall(PetscFree(pc->data)); 802 PCMPIServerInSolve = PETSC_FALSE; 803 PetscCall(MatDestroy(&A)); 804 PetscCall(MatDestroy(&B)); 805 PetscCall(VecDestroy(&x)); 806 PetscCall(VecDestroy(&b)); 807 PetscFunctionReturn(PETSC_SUCCESS); 808 } 809 810 /* 811 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 812 right-hand side to the parallel PC 813 */ 814 static PetscErrorCode PCSetUp_MPI(PC pc) 815 { 816 PC_MPI *km = (PC_MPI *)pc->data; 817 PetscMPIInt rank, size; 818 PetscBool newmatrix = PETSC_FALSE; 819 820 PetscFunctionBegin; 821 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 822 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 823 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 824 825 if (!pc->setupcalled) { 826 if (!km->alwaysuseserver) { 827 PetscInt n; 828 Mat sA; 829 /* short circuit for small systems */ 830 PetscCall(PCGetOperators(pc, &sA, &sA)); 831 PetscCall(MatGetSize(sA, &n, NULL)); 832 if (n < 2 * km->mincntperrank - 1 || size == 1) { 833 pc->ops->setup = NULL; 834 pc->ops->apply = PCApply_Seq; 835 pc->ops->destroy = PCDestroy_Seq; 836 pc->ops->view = PCView_Seq; 837 PetscCall(PCSetUp_Seq(pc)); 838 PetscFunctionReturn(PETSC_SUCCESS); 839 } 840 } 841 842 PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE)); 843 PetscCall(PCMPICreate(pc)); 844 newmatrix = PETSC_TRUE; 845 } 846 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 847 848 if (newmatrix) { 849 PetscCall(PetscInfo(pc, "New matrix or matrix has changed nonzero structure\n")); 850 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT)); 851 PetscCall(PCMPISetMat(pc)); 852 } else { 853 PetscCall(PetscInfo(pc, "Matrix has only changed nonzero values\n")); 854 PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES)); 855 PetscCall(PCMPIUpdateMatValues(pc)); 856 } 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 860 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 861 { 862 PetscFunctionBegin; 863 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE)); 864 PetscCall(PCMPISolve(pc, b, x)); 865 PetscFunctionReturn(PETSC_SUCCESS); 866 } 867 868 static PetscErrorCode PCDestroy_MPI(PC pc) 869 { 870 PetscFunctionBegin; 871 PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY)); 872 PetscCall(PCMPIDestroy(pc)); 873 PetscCall(PetscFree(pc->data)); 874 PetscFunctionReturn(PETSC_SUCCESS); 875 } 876 877 /* 878 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database 879 */ 880 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 881 { 882 PC_MPI *km = (PC_MPI *)pc->data; 883 MPI_Comm comm; 884 PetscMPIInt size; 885 886 PetscFunctionBegin; 887 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 888 PetscCallMPI(MPI_Comm_size(comm, &size)); 889 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 890 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank)); 891 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n")); 892 PetscFunctionReturn(PETSC_SUCCESS); 893 } 894 895 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems PetscOptionsObject) 896 { 897 PC_MPI *km = (PC_MPI *)pc->data; 898 899 PetscFunctionBegin; 900 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 901 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 902 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 903 PetscOptionsHeadEnd(); 904 PetscFunctionReturn(PETSC_SUCCESS); 905 } 906 907 /*MC 908 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 909 910 Options Database Keys for the Server: 911 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 912 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 913 - -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true 914 915 Options Database Keys for a specific `KSP` object 916 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 917 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 918 919 Level: developer 920 921 Notes: 922 This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or 923 `MatCreateSeqAIJWithArrays()` 924 925 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 926 927 It can be particularly useful for user OpenMP code or potentially user GPU code. 928 929 When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side 930 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 931 932 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 933 because they are not the actual solver objects. 934 935 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 936 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 937 938 Developer Note: 939 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 940 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 941 942 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 943 M*/ 944 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 945 { 946 PC_MPI *km; 947 char *found = NULL; 948 949 PetscFunctionBegin; 950 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 951 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 952 953 /* material from PCSetType() */ 954 PetscTryTypeMethod(pc, destroy); 955 pc->ops->destroy = NULL; 956 pc->data = NULL; 957 958 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 959 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 960 pc->modifysubmatrices = NULL; 961 pc->modifysubmatricesP = NULL; 962 pc->setupcalled = 0; 963 964 PetscCall(PetscNew(&km)); 965 pc->data = (void *)km; 966 967 km->mincntperrank = 10000; 968 969 pc->ops->setup = PCSetUp_MPI; 970 pc->ops->apply = PCApply_MPI; 971 pc->ops->destroy = PCDestroy_MPI; 972 pc->ops->view = PCView_MPI; 973 pc->ops->setfromoptions = PCSetFromOptions_MPI; 974 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 975 PetscFunctionReturn(PETSC_SUCCESS); 976 } 977 978 /*@ 979 PCMPIGetKSP - Gets the `KSP` created by the `PCMPI` 980 981 Not Collective 982 983 Input Parameter: 984 . pc - the preconditioner context 985 986 Output Parameter: 987 . innerksp - the inner `KSP` 988 989 Level: advanced 990 991 .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE` 992 @*/ 993 PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp) 994 { 995 PC_MPI *red = (PC_MPI *)pc->data; 996 997 PetscFunctionBegin; 998 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 999 PetscAssertPointer(innerksp, 2); 1000 *innerksp = red->ksps[0]; 1001 PetscFunctionReturn(PETSC_SUCCESS); 1002 } 1003