1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right-hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 14 #include <petsc/private/kspimpl.h> 15 #include <petscts.h> 16 #include <petsctao.h> 17 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 18 #include <pthread.h> 19 #endif 20 21 #define PC_MPI_MAX_RANKS 256 22 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 23 24 typedef struct { 25 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */ 26 PetscInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 27 PetscInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 28 PetscInt mincntperrank; /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */ 29 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI process is used for the solve */ 30 } PC_MPI; 31 32 typedef enum { 33 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 34 PCMPI_CREATE, 35 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 36 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 37 PCMPI_SOLVE, 38 PCMPI_VIEW, 39 PCMPI_DESTROY /* destroy a PC that is no longer needed */ 40 } PCMPICommand; 41 42 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 43 static PetscBool PCMPICommSet = PETSC_FALSE; 44 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 45 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 46 static PetscLogEvent EventServerDist, EventServerDistMPI; 47 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 48 static pthread_mutex_t *PCMPIServerLocks; 49 #else 50 static void *PCMPIServerLocks; 51 #endif 52 53 static PetscErrorCode PCMPICommsCreate(void) 54 { 55 MPI_Comm comm = PC_MPI_COMM_WORLD; 56 PetscMPIInt size, rank, i; 57 58 PetscFunctionBegin; 59 PetscCallMPI(MPI_Comm_size(comm, &size)); 60 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 61 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 62 /* comm for size 1 is useful only for debugging */ 63 for (i = 0; i < size; i++) { 64 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 65 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 66 PCMPISolveCounts[i] = 0; 67 PCMPIKSPCounts[i] = 0; 68 PCMPIIterations[i] = 0; 69 PCMPISizes[i] = 0; 70 } 71 PCMPICommSet = PETSC_TRUE; 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 static PetscErrorCode PCMPICommsDestroy(void) 76 { 77 MPI_Comm comm = PC_MPI_COMM_WORLD; 78 PetscMPIInt size, rank, i; 79 80 PetscFunctionBegin; 81 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 82 PetscCallMPI(MPI_Comm_size(comm, &size)); 83 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 84 for (i = 0; i < size; i++) { 85 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 86 } 87 PCMPICommSet = PETSC_FALSE; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode PCMPICreate(PC pc) 92 { 93 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 94 MPI_Comm comm = PC_MPI_COMM_WORLD; 95 KSP ksp; 96 PetscInt N[2], mincntperrank = 0; 97 PetscMPIInt size; 98 Mat sA; 99 char *cprefix = NULL; 100 PetscMPIInt len = 0; 101 102 PetscFunctionBegin; 103 PCMPIServerInSolve = PETSC_TRUE; 104 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 105 PetscCallMPI(MPI_Comm_size(comm, &size)); 106 if (pc) { 107 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 108 PetscCall(PCGetOperators(pc, &sA, &sA)); 109 PetscCall(MatGetSize(sA, &N[0], &N[1])); 110 } 111 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 112 113 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 114 if (km) mincntperrank = km->mincntperrank; 115 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 116 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 117 if (comm == MPI_COMM_NULL) { 118 ksp = NULL; 119 PCMPIServerInSolve = PETSC_FALSE; 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 PetscCall(PetscLogStagePush(PCMPIStage)); 123 PetscCall(KSPCreate(comm, &ksp)); 124 PetscCall(KSPSetNestLevel(ksp, 1)); 125 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 126 PetscCall(PetscLogStagePop()); 127 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 128 if (pc) { 129 size_t slen; 130 const char *prefix = NULL; 131 char *found = NULL; 132 133 PetscCallMPI(MPI_Comm_size(comm, &size)); 134 PCMPIKSPCounts[size - 1]++; 135 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 136 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 137 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 138 PetscCall(PetscStrallocpy(prefix, &cprefix)); 139 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 140 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 141 *found = 0; 142 PetscCall(PetscStrlen(cprefix, &slen)); 143 PetscCall(PetscMPIIntCast(slen, &len)); 144 } 145 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 146 if (len) { 147 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 148 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 149 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 150 } 151 PetscCall(PetscFree(cprefix)); 152 PCMPIServerInSolve = PETSC_FALSE; 153 PetscFunctionReturn(PETSC_SUCCESS); 154 } 155 156 static PetscErrorCode PCMPISetMat(PC pc) 157 { 158 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 159 Mat A; 160 PetscInt m, n, j, bs; 161 Mat sA; 162 MPI_Comm comm = PC_MPI_COMM_WORLD; 163 KSP ksp; 164 PetscLayout layout; 165 const PetscInt *IA = NULL, *JA = NULL, *ia, *ja; 166 const PetscInt *range; 167 PetscInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz; 168 PetscMPIInt size, i; 169 const PetscScalar *a = NULL, *sa; 170 PetscInt matproperties[8] = {0}, rstart, rend; 171 char *cprefix; 172 173 PetscFunctionBegin; 174 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 175 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 176 PCMPIServerInSolve = PETSC_TRUE; 177 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 178 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 179 if (pc) { 180 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 181 const char *prefix; 182 size_t clen; 183 184 PetscCallMPI(MPI_Comm_size(comm, &size)); 185 PCMPIMatCounts[size - 1]++; 186 PetscCall(PCGetOperators(pc, &sA, &sA)); 187 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 188 PetscCall(MatGetBlockSize(sA, &bs)); 189 matproperties[2] = bs; 190 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 191 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 192 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 193 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 194 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 195 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 196 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 197 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 198 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 199 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 200 PetscCall(PetscStrallocpy(prefix, &cprefix)); 201 PetscCall(PetscStrlen(cprefix, &clen)); 202 matproperties[7] = (PetscInt)clen; 203 } 204 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 205 206 /* determine ownership ranges of matrix columns */ 207 PetscCall(PetscLayoutCreate(comm, &layout)); 208 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 209 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 210 PetscCall(PetscLayoutSetUp(layout)); 211 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 212 PetscCall(PetscLayoutDestroy(&layout)); 213 214 /* determine ownership ranges of matrix rows */ 215 PetscCall(PetscLayoutCreate(comm, &layout)); 216 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 217 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 218 PetscCall(PetscLayoutSetUp(layout)); 219 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 220 PetscCall(PetscLayoutGetRange(layout, &rstart, &rend)); 221 222 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 223 /* copy over the matrix nonzero structure and values */ 224 if (pc) { 225 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 226 if (!PCMPIServerUseShmget) { 227 NZ = km->NZ; 228 NZdispl = km->NZdispl; 229 PetscCall(PetscLayoutGetRanges(layout, &range)); 230 for (i = 0; i < size; i++) { 231 sendcounti[i] = 1 + range[i + 1] - range[i]; 232 NZ[i] = IA[range[i + 1]] - IA[range[i]]; 233 } 234 displi[0] = 0; 235 NZdispl[0] = 0; 236 for (j = 1; j < size; j++) { 237 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 238 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 239 } 240 } 241 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 242 } 243 PetscCall(PetscLayoutDestroy(&layout)); 244 245 PetscCall(MatCreate(comm, &A)); 246 if (matproperties[7] > 0) { 247 PetscMPIInt ni; 248 249 PetscCall(PetscMPIIntCast(matproperties[7] + 1, &ni)); 250 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 251 PetscCallMPI(MPI_Bcast(cprefix, ni, MPI_CHAR, 0, comm)); 252 PetscCall(MatSetOptionsPrefix(A, cprefix)); 253 PetscCall(PetscFree(cprefix)); 254 } 255 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 256 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 257 PetscCall(MatSetType(A, MATMPIAIJ)); 258 259 if (!PCMPIServerUseShmget) { 260 PetscCallMPI(MPI_Scatter(NZ, 1, MPIU_INT, &nz, 1, MPIU_INT, 0, comm)); 261 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 262 PetscCallMPI(MPIU_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, n + 1, MPIU_INT, 0, comm)); 263 PetscCallMPI(MPIU_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm)); 264 PetscCallMPI(MPIU_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm)); 265 } else { 266 const void *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa}; 267 PCMPIServerAddresses *addresses; 268 269 PetscCall(PetscNew(&addresses)); 270 addresses->n = 3; 271 PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr)); 272 ia = rstart + (PetscInt *)addresses->addr[0]; 273 ja = ia[0] + (PetscInt *)addresses->addr[1]; 274 a = ia[0] + (PetscScalar *)addresses->addr[2]; 275 PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, PCMPIServerAddressesDestroy)); 276 } 277 278 if (pc) { 279 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 280 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 281 } 282 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 283 284 PetscCall(PetscLogStagePush(PCMPIStage)); 285 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 286 PetscCall(MatSetBlockSize(A, matproperties[2])); 287 288 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 289 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 290 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 291 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 292 293 if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a)); 294 PetscCall(KSPSetOperators(ksp, A, A)); 295 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 296 PetscCall(PetscLogStagePop()); 297 if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */ 298 const PetscInt *range; 299 300 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 301 for (i = 0; i < size; i++) { 302 km->sendcount[i] = range[i + 1] - range[i]; 303 km->displ[i] = range[i]; 304 } 305 } 306 PetscCall(MatDestroy(&A)); 307 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 308 PetscCall(KSPSetFromOptions(ksp)); 309 PCMPIServerInSolve = PETSC_FALSE; 310 PetscFunctionReturn(PETSC_SUCCESS); 311 } 312 313 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 314 { 315 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 316 KSP ksp; 317 Mat sA, A; 318 MPI_Comm comm = PC_MPI_COMM_WORLD; 319 const PetscInt *ia, *IA; 320 const PetscScalar *a; 321 PetscCount nz; 322 const PetscScalar *sa = NULL; 323 PetscMPIInt size; 324 PetscInt rstart, matproperties[4] = {0, 0, 0, 0}; 325 326 PetscFunctionBegin; 327 if (pc) { 328 PetscCall(PCGetOperators(pc, &sA, &sA)); 329 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 330 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 331 } 332 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 333 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 334 PCMPIServerInSolve = PETSC_TRUE; 335 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 336 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 337 PetscCallMPI(MPI_Comm_size(comm, &size)); 338 PCMPIMatCounts[size - 1]++; 339 PetscCall(KSPGetOperators(ksp, NULL, &A)); 340 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 341 if (!PCMPIServerUseShmget) { 342 PetscInt petsc_nz; 343 344 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 345 PetscCall(PetscIntCast(nz, &petsc_nz)); 346 PetscCall(PetscMalloc1(nz, &a)); 347 PetscCallMPI(MPIU_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, petsc_nz, MPIU_SCALAR, 0, comm)); 348 } else { 349 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 350 PCMPIServerAddresses *addresses; 351 PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses)); 352 ia = rstart + (PetscInt *)addresses->addr[0]; 353 a = ia[0] + (PetscScalar *)addresses->addr[2]; 354 } 355 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 356 if (pc) { 357 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 358 359 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 360 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 361 362 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 363 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 364 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 365 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 366 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 367 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 368 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 369 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 370 } 371 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 372 if (!PCMPIServerUseShmget) PetscCall(PetscFree(a)); 373 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 374 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 375 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 376 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 377 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 378 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 379 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 380 PCMPIServerInSolve = PETSC_FALSE; 381 PetscFunctionReturn(PETSC_SUCCESS); 382 } 383 384 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 385 { 386 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 387 KSP ksp; 388 MPI_Comm comm = PC_MPI_COMM_WORLD; 389 const PetscScalar *sb = NULL, *x; 390 PetscScalar *b, *sx = NULL; 391 PetscInt its, n; 392 PetscMPIInt size; 393 void *addr[2]; 394 395 PetscFunctionBegin; 396 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 397 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 398 PCMPIServerInSolve = PETSC_TRUE; 399 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 400 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 401 402 /* scatterv rhs */ 403 PetscCallMPI(MPI_Comm_size(comm, &size)); 404 if (pc) { 405 PetscInt N; 406 407 PCMPISolveCounts[size - 1]++; 408 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 409 PCMPISizes[size - 1] += N; 410 } 411 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 412 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 413 if (!PCMPIServerUseShmget) { 414 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 415 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 416 PetscCallMPI(MPIU_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 417 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 418 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 419 // TODO: scatter initial guess if needed 420 } else { 421 PetscInt rstart; 422 423 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 424 if (pc) PetscCall(VecGetArray(X, &sx)); 425 const void *inaddr[2] = {(const void **)sb, (const void **)sx}; 426 if (pc) PetscCall(VecRestoreArray(X, &sx)); 427 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 428 429 PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr)); 430 PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL)); 431 PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0])); 432 PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1])); 433 } 434 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 435 436 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 437 PetscCall(PetscLogStagePush(PCMPIStage)); 438 PetscCall(KSPSolve(ksp, NULL, NULL)); 439 PetscCall(PetscLogStagePop()); 440 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 441 PetscCall(KSPGetIterationNumber(ksp, &its)); 442 PCMPIIterations[size - 1] += its; 443 // TODO: send iterations up to outer KSP 444 445 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr)); 446 447 /* gather solution */ 448 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 449 if (!PCMPIServerUseShmget) { 450 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 451 if (pc) PetscCall(VecGetArray(X, &sx)); 452 PetscCallMPI(MPIU_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 453 if (pc) PetscCall(VecRestoreArray(X, &sx)); 454 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 455 } else { 456 PetscCallMPI(MPI_Barrier(comm)); 457 PetscCall(VecResetArray(ksp->vec_rhs)); 458 PetscCall(VecResetArray(ksp->vec_sol)); 459 } 460 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 461 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 462 PCMPIServerInSolve = PETSC_FALSE; 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 static PetscErrorCode PCMPIDestroy(PC pc) 467 { 468 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 469 KSP ksp; 470 MPI_Comm comm = PC_MPI_COMM_WORLD; 471 472 PetscFunctionBegin; 473 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 474 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 475 PetscCall(PetscLogStagePush(PCMPIStage)); 476 PCMPIServerInSolve = PETSC_TRUE; 477 PetscCall(KSPDestroy(&ksp)); 478 PetscCall(PetscLogStagePop()); 479 PCMPIServerInSolve = PETSC_FALSE; 480 PetscFunctionReturn(PETSC_SUCCESS); 481 } 482 483 static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request) 484 { 485 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 486 PetscMPIInt dummy1 = 1, dummy2; 487 #endif 488 489 PetscFunctionBegin; 490 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 491 if (PCMPIServerUseShmget) { 492 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]); 493 } 494 #endif 495 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 496 /* next line ensures the sender has already taken the lock */ 497 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 498 if (PCMPIServerUseShmget) { 499 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 500 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]); 501 } 502 #endif 503 PetscFunctionReturn(PETSC_SUCCESS); 504 } 505 506 /*@C 507 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 508 parallel `KSP` solves and management of parallel `KSP` objects. 509 510 Logically Collective on all MPI processes except rank 0 511 512 Options Database Keys: 513 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 514 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 515 - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported) 516 517 Level: developer 518 519 Note: 520 This is normally started automatically in `PetscInitialize()` when the option is provided 521 522 See `PCMPI` for information on using the solver with a `KSP` object 523 524 See `PetscShmgetAllocateArray()` for instructions on how to ensure the shared memory is available on your machine. 525 526 Developer Notes: 527 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 528 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 529 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 530 531 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 532 533 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 534 535 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 536 537 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 538 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 539 540 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 541 all MPI processes but the user callback only runs on one MPI process per node. 542 543 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 544 the `MPI_Comm` argument from PETSc calls. 545 546 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 547 @*/ 548 PetscErrorCode PCMPIServerBegin(void) 549 { 550 PetscMPIInt rank; 551 552 PetscFunctionBegin; 553 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 554 if (PetscDefined(USE_SINGLE_LIBRARY)) { 555 PetscCall(VecInitializePackage()); 556 PetscCall(MatInitializePackage()); 557 PetscCall(DMInitializePackage()); 558 PetscCall(PCInitializePackage()); 559 PetscCall(KSPInitializePackage()); 560 PetscCall(SNESInitializePackage()); 561 PetscCall(TSInitializePackage()); 562 PetscCall(TaoInitializePackage()); 563 } 564 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 565 PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist)); 566 PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI)); 567 568 if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE; 569 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL)); 570 571 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 572 if (PCMPIServerUseShmget) { 573 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 574 PetscMPIInt size; 575 576 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 577 if (size > 1) { 578 pthread_mutex_t *locks; 579 580 if (rank == 0) { 581 PCMPIServerActive = PETSC_TRUE; 582 PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks)); 583 } 584 PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks)); 585 if (rank == 0) { 586 pthread_mutexattr_t attr; 587 588 pthread_mutexattr_init(&attr); 589 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); 590 591 for (int i = 1; i < size; i++) { 592 pthread_mutex_init(&PCMPIServerLocks[i], &attr); 593 pthread_mutex_lock(&PCMPIServerLocks[i]); 594 } 595 } 596 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 597 } 598 #endif 599 } 600 if (rank == 0) { 601 PETSC_COMM_WORLD = PETSC_COMM_SELF; 602 PCMPIServerActive = PETSC_TRUE; 603 PetscFunctionReturn(PETSC_SUCCESS); 604 } 605 606 while (PETSC_TRUE) { 607 PCMPICommand request = PCMPI_CREATE; 608 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 609 PetscMPIInt dummy1 = 1, dummy2; 610 #endif 611 612 // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters? 613 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 614 if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]); 615 #endif 616 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 617 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 618 if (PCMPIServerUseShmget) { 619 /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */ 620 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 621 pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]); 622 } 623 #endif 624 switch (request) { 625 case PCMPI_CREATE: 626 PetscCall(PCMPICreate(NULL)); 627 break; 628 case PCMPI_SET_MAT: 629 PetscCall(PCMPISetMat(NULL)); 630 break; 631 case PCMPI_UPDATE_MAT_VALUES: 632 PetscCall(PCMPIUpdateMatValues(NULL)); 633 break; 634 case PCMPI_VIEW: 635 // PetscCall(PCMPIView(NULL)); 636 break; 637 case PCMPI_SOLVE: 638 PetscCall(PCMPISolve(NULL, NULL, NULL)); 639 break; 640 case PCMPI_DESTROY: 641 PetscCall(PCMPIDestroy(NULL)); 642 break; 643 case PCMPI_EXIT: 644 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 645 PetscCall(PetscFinalize()); 646 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 647 break; 648 default: 649 break; 650 } 651 } 652 PetscFunctionReturn(PETSC_SUCCESS); 653 } 654 655 /*@C 656 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 657 parallel KSP solves and management of parallel `KSP` objects. 658 659 Logically Collective on all MPI ranks except 0 660 661 Level: developer 662 663 Note: 664 This is normally called automatically in `PetscFinalize()` 665 666 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 667 @*/ 668 PetscErrorCode PCMPIServerEnd(void) 669 { 670 PetscFunctionBegin; 671 if (PetscGlobalRank == 0) { 672 PetscViewer viewer = NULL; 673 PetscViewerFormat format; 674 675 PetscCall(PetscShmgetAddressesFinalize()); 676 PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT)); 677 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 678 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 679 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 680 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 681 PetscOptionsEnd(); 682 if (viewer) { 683 PetscBool isascii; 684 685 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 686 if (isascii) { 687 PetscMPIInt size; 688 PetscMPIInt i; 689 690 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 691 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 692 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 693 if (PCMPIKSPCountsSeq) { 694 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 695 } 696 for (i = 0; i < size; i++) { 697 if (PCMPIKSPCounts[i]) { 698 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 699 } 700 } 701 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not ")); 702 } 703 PetscCall(PetscViewerDestroy(&viewer)); 704 } 705 } 706 PetscCall(PCMPICommsDestroy()); 707 PCMPIServerActive = PETSC_FALSE; 708 PetscFunctionReturn(PETSC_SUCCESS); 709 } 710 711 /* 712 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 713 because, for example, the problem is small. This version is more efficient because it does not require copying any data 714 */ 715 static PetscErrorCode PCSetUp_Seq(PC pc) 716 { 717 PC_MPI *km = (PC_MPI *)pc->data; 718 Mat sA; 719 const char *prefix; 720 char *found = NULL, *cprefix; 721 722 PetscFunctionBegin; 723 PCMPIServerInSolve = PETSC_TRUE; 724 PetscCall(PCGetOperators(pc, NULL, &sA)); 725 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 726 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 727 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 728 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 729 730 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 731 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 732 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 733 PetscCall(PetscStrallocpy(prefix, &cprefix)); 734 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 735 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 736 *found = 0; 737 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 738 PetscCall(PetscFree(cprefix)); 739 740 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 741 PetscCall(KSPSetFromOptions(km->ksps[0])); 742 PetscCall(KSPSetUp(km->ksps[0])); 743 PetscCall(PetscInfo(pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 744 PCMPIKSPCountsSeq++; 745 PCMPIServerInSolve = PETSC_FALSE; 746 PetscFunctionReturn(PETSC_SUCCESS); 747 } 748 749 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 750 { 751 PC_MPI *km = (PC_MPI *)pc->data; 752 PetscInt its, n; 753 Mat A; 754 755 PetscFunctionBegin; 756 PCMPIServerInSolve = PETSC_TRUE; 757 PetscCall(KSPSolve(km->ksps[0], b, x)); 758 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 759 PCMPISolveCountsSeq++; 760 PCMPIIterationsSeq += its; 761 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 762 PetscCall(MatGetSize(A, &n, NULL)); 763 PCMPISizesSeq += n; 764 PCMPIServerInSolve = PETSC_FALSE; 765 /* 766 do not keep reference to previous rhs and solution since destroying them in the next KSPSolve() 767 my use PetscFree() instead of PCMPIArrayDeallocate() 768 */ 769 PetscCall(VecDestroy(&km->ksps[0]->vec_rhs)); 770 PetscCall(VecDestroy(&km->ksps[0]->vec_sol)); 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 775 { 776 PC_MPI *km = (PC_MPI *)pc->data; 777 778 PetscFunctionBegin; 779 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 780 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank)); 781 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 782 PetscFunctionReturn(PETSC_SUCCESS); 783 } 784 785 static PetscErrorCode PCDestroy_Seq(PC pc) 786 { 787 PC_MPI *km = (PC_MPI *)pc->data; 788 Mat A, B; 789 Vec x, b; 790 791 PetscFunctionBegin; 792 PCMPIServerInSolve = PETSC_TRUE; 793 /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */ 794 PetscCall(KSPGetOperators(km->ksps[0], &A, &B)); 795 PetscCall(PetscObjectReference((PetscObject)A)); 796 PetscCall(PetscObjectReference((PetscObject)B)); 797 PetscCall(KSPGetSolution(km->ksps[0], &x)); 798 PetscCall(PetscObjectReference((PetscObject)x)); 799 PetscCall(KSPGetRhs(km->ksps[0], &b)); 800 PetscCall(PetscObjectReference((PetscObject)b)); 801 PetscCall(KSPDestroy(&km->ksps[0])); 802 PetscCall(PetscFree(pc->data)); 803 PCMPIServerInSolve = PETSC_FALSE; 804 PetscCall(MatDestroy(&A)); 805 PetscCall(MatDestroy(&B)); 806 PetscCall(VecDestroy(&x)); 807 PetscCall(VecDestroy(&b)); 808 PetscFunctionReturn(PETSC_SUCCESS); 809 } 810 811 /* 812 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 813 right-hand side to the parallel PC 814 */ 815 static PetscErrorCode PCSetUp_MPI(PC pc) 816 { 817 PC_MPI *km = (PC_MPI *)pc->data; 818 PetscMPIInt rank, size; 819 PetscBool newmatrix = PETSC_FALSE; 820 821 PetscFunctionBegin; 822 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 823 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 824 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 825 826 if (!pc->setupcalled) { 827 if (!km->alwaysuseserver) { 828 PetscInt n; 829 Mat sA; 830 /* short circuit for small systems */ 831 PetscCall(PCGetOperators(pc, &sA, &sA)); 832 PetscCall(MatGetSize(sA, &n, NULL)); 833 if (n < 2 * km->mincntperrank - 1 || size == 1) { 834 pc->ops->setup = NULL; 835 pc->ops->apply = PCApply_Seq; 836 pc->ops->destroy = PCDestroy_Seq; 837 pc->ops->view = PCView_Seq; 838 PetscCall(PCSetUp_Seq(pc)); 839 PetscFunctionReturn(PETSC_SUCCESS); 840 } 841 } 842 843 PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE)); 844 PetscCall(PCMPICreate(pc)); 845 newmatrix = PETSC_TRUE; 846 } 847 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 848 849 if (newmatrix) { 850 PetscCall(PetscInfo(pc, "New matrix or matrix has changed nonzero structure\n")); 851 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT)); 852 PetscCall(PCMPISetMat(pc)); 853 } else { 854 PetscCall(PetscInfo(pc, "Matrix has only changed nonzero values\n")); 855 PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES)); 856 PetscCall(PCMPIUpdateMatValues(pc)); 857 } 858 PetscFunctionReturn(PETSC_SUCCESS); 859 } 860 861 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 862 { 863 PetscFunctionBegin; 864 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE)); 865 PetscCall(PCMPISolve(pc, b, x)); 866 PetscFunctionReturn(PETSC_SUCCESS); 867 } 868 869 static PetscErrorCode PCDestroy_MPI(PC pc) 870 { 871 PetscFunctionBegin; 872 PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY)); 873 PetscCall(PCMPIDestroy(pc)); 874 PetscCall(PetscFree(pc->data)); 875 PetscFunctionReturn(PETSC_SUCCESS); 876 } 877 878 /* 879 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database 880 */ 881 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 882 { 883 PC_MPI *km = (PC_MPI *)pc->data; 884 MPI_Comm comm; 885 PetscMPIInt size; 886 887 PetscFunctionBegin; 888 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 889 PetscCallMPI(MPI_Comm_size(comm, &size)); 890 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 891 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank)); 892 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n")); 893 PetscFunctionReturn(PETSC_SUCCESS); 894 } 895 896 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems PetscOptionsObject) 897 { 898 PC_MPI *km = (PC_MPI *)pc->data; 899 900 PetscFunctionBegin; 901 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 902 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 903 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 904 PetscOptionsHeadEnd(); 905 PetscFunctionReturn(PETSC_SUCCESS); 906 } 907 908 /*MC 909 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 910 911 Options Database Keys for the Server: 912 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 913 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 914 - -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true 915 916 Options Database Keys for a specific `KSP` object 917 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 918 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 919 920 Level: developer 921 922 Notes: 923 This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or 924 `MatCreateSeqAIJWithArrays()` 925 926 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 927 928 It can be particularly useful for user OpenMP code or potentially user GPU code. 929 930 When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side 931 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 932 933 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 934 because they are not the actual solver objects. 935 936 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 937 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 938 939 Developer Note: 940 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 941 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 942 943 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 944 M*/ 945 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 946 { 947 PC_MPI *km; 948 char *found = NULL; 949 950 PetscFunctionBegin; 951 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 952 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 953 954 /* material from PCSetType() */ 955 PetscTryTypeMethod(pc, destroy); 956 pc->ops->destroy = NULL; 957 pc->data = NULL; 958 959 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 960 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 961 pc->modifysubmatrices = NULL; 962 pc->modifysubmatricesP = NULL; 963 pc->setupcalled = PETSC_FALSE; 964 965 PetscCall(PetscNew(&km)); 966 pc->data = (void *)km; 967 968 km->mincntperrank = 10000; 969 970 pc->ops->setup = PCSetUp_MPI; 971 pc->ops->apply = PCApply_MPI; 972 pc->ops->destroy = PCDestroy_MPI; 973 pc->ops->view = PCView_MPI; 974 pc->ops->setfromoptions = PCSetFromOptions_MPI; 975 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 976 PetscFunctionReturn(PETSC_SUCCESS); 977 } 978 979 /*@ 980 PCMPIGetKSP - Gets the `KSP` created by the `PCMPI` 981 982 Not Collective 983 984 Input Parameter: 985 . pc - the preconditioner context 986 987 Output Parameter: 988 . innerksp - the inner `KSP` 989 990 Level: advanced 991 992 .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE` 993 @*/ 994 PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp) 995 { 996 PC_MPI *red = (PC_MPI *)pc->data; 997 998 PetscFunctionBegin; 999 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1000 PetscAssertPointer(innerksp, 2); 1001 *innerksp = red->ksps[0]; 1002 PetscFunctionReturn(PETSC_SUCCESS); 1003 } 1004