1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right-hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 14 #include <petsc/private/kspimpl.h> 15 #include <petscts.h> 16 #include <petsctao.h> 17 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 18 #include <pthread.h> 19 #endif 20 21 #define PC_MPI_MAX_RANKS 256 22 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 23 24 typedef struct { 25 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */ 26 PetscInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 27 PetscInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 28 PetscInt mincntperrank; /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */ 29 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI process is used for the solve */ 30 } PC_MPI; 31 32 typedef enum { 33 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 34 PCMPI_CREATE, 35 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 36 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 37 PCMPI_SOLVE, 38 PCMPI_VIEW, 39 PCMPI_DESTROY /* destroy a PC that is no longer needed */ 40 } PCMPICommand; 41 42 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 43 static PetscBool PCMPICommSet = PETSC_FALSE; 44 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 45 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 46 static PetscLogEvent EventServerDist, EventServerDistMPI; 47 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 48 static pthread_mutex_t *PCMPIServerLocks; 49 #else 50 static void *PCMPIServerLocks; 51 #endif 52 53 static PetscErrorCode PCMPICommsCreate(void) 54 { 55 MPI_Comm comm = PC_MPI_COMM_WORLD; 56 PetscMPIInt size, rank, i; 57 58 PetscFunctionBegin; 59 PetscCallMPI(MPI_Comm_size(comm, &size)); 60 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 61 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 62 /* comm for size 1 is useful only for debugging */ 63 for (i = 0; i < size; i++) { 64 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 65 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 66 PCMPISolveCounts[i] = 0; 67 PCMPIKSPCounts[i] = 0; 68 PCMPIIterations[i] = 0; 69 PCMPISizes[i] = 0; 70 } 71 PCMPICommSet = PETSC_TRUE; 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 static PetscErrorCode PCMPICommsDestroy(void) 76 { 77 MPI_Comm comm = PC_MPI_COMM_WORLD; 78 PetscMPIInt size, rank, i; 79 80 PetscFunctionBegin; 81 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 82 PetscCallMPI(MPI_Comm_size(comm, &size)); 83 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 84 for (i = 0; i < size; i++) { 85 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 86 } 87 PCMPICommSet = PETSC_FALSE; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode PCMPICreate(PC pc) 92 { 93 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 94 MPI_Comm comm = PC_MPI_COMM_WORLD; 95 KSP ksp; 96 PetscInt N[2], mincntperrank = 0; 97 PetscMPIInt size; 98 Mat sA; 99 char *cprefix = NULL; 100 PetscMPIInt len = 0; 101 102 PetscFunctionBegin; 103 PCMPIServerInSolve = PETSC_TRUE; 104 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 105 PetscCallMPI(MPI_Comm_size(comm, &size)); 106 if (pc) { 107 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 108 PetscCall(PCGetOperators(pc, &sA, &sA)); 109 PetscCall(MatGetSize(sA, &N[0], &N[1])); 110 } 111 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 112 113 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 114 if (km) mincntperrank = km->mincntperrank; 115 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 116 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 117 if (comm == MPI_COMM_NULL) { 118 ksp = NULL; 119 PCMPIServerInSolve = PETSC_FALSE; 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 PetscCall(PetscLogStagePush(PCMPIStage)); 123 PetscCall(KSPCreate(comm, &ksp)); 124 PetscCall(KSPSetNestLevel(ksp, 1)); 125 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 126 PetscCall(PetscLogStagePop()); 127 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 128 if (pc) { 129 size_t slen; 130 const char *prefix = NULL; 131 char *found = NULL; 132 133 PetscCallMPI(MPI_Comm_size(comm, &size)); 134 PCMPIKSPCounts[size - 1]++; 135 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 136 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 137 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 138 PetscCall(PetscStrallocpy(prefix, &cprefix)); 139 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 140 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 141 *found = 0; 142 PetscCall(PetscStrlen(cprefix, &slen)); 143 PetscCall(PetscMPIIntCast(slen, &len)); 144 } 145 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 146 if (len) { 147 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 148 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 149 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 150 } 151 PetscCall(PetscFree(cprefix)); 152 PCMPIServerInSolve = PETSC_FALSE; 153 PetscFunctionReturn(PETSC_SUCCESS); 154 } 155 156 static PetscErrorCode PCMPISetMat(PC pc) 157 { 158 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 159 Mat A; 160 PetscInt m, n, j, bs; 161 Mat sA; 162 MPI_Comm comm = PC_MPI_COMM_WORLD; 163 KSP ksp; 164 PetscLayout layout; 165 const PetscInt *IA = NULL, *JA = NULL, *ia, *ja; 166 const PetscInt *range; 167 PetscInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz; 168 PetscMPIInt size, i; 169 const PetscScalar *a = NULL, *sa; 170 PetscInt matproperties[8] = {0}, rstart, rend; 171 char *cprefix; 172 173 PetscFunctionBegin; 174 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 175 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 176 PCMPIServerInSolve = PETSC_TRUE; 177 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 178 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 179 if (pc) { 180 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 181 const char *prefix; 182 size_t clen; 183 184 PetscCallMPI(MPI_Comm_size(comm, &size)); 185 PCMPIMatCounts[size - 1]++; 186 PetscCall(PCGetOperators(pc, &sA, &sA)); 187 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 188 PetscCall(MatGetBlockSize(sA, &bs)); 189 matproperties[2] = bs; 190 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 191 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 192 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 193 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 194 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 195 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 196 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 197 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 198 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 199 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 200 PetscCall(PetscStrallocpy(prefix, &cprefix)); 201 PetscCall(PetscStrlen(cprefix, &clen)); 202 matproperties[7] = (PetscInt)clen; 203 } 204 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 205 206 /* determine ownership ranges of matrix columns */ 207 PetscCall(PetscLayoutCreate(comm, &layout)); 208 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 209 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 210 PetscCall(PetscLayoutSetUp(layout)); 211 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 212 PetscCall(PetscLayoutDestroy(&layout)); 213 214 /* determine ownership ranges of matrix rows */ 215 PetscCall(PetscLayoutCreate(comm, &layout)); 216 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 217 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 218 PetscCall(PetscLayoutSetUp(layout)); 219 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 220 PetscCall(PetscLayoutGetRange(layout, &rstart, &rend)); 221 222 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 223 /* copy over the matrix nonzero structure and values */ 224 if (pc) { 225 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 226 if (!PCMPIServerUseShmget) { 227 NZ = km->NZ; 228 NZdispl = km->NZdispl; 229 PetscCall(PetscLayoutGetRanges(layout, &range)); 230 for (i = 0; i < size; i++) { 231 sendcounti[i] = 1 + range[i + 1] - range[i]; 232 NZ[i] = IA[range[i + 1]] - IA[range[i]]; 233 } 234 displi[0] = 0; 235 NZdispl[0] = 0; 236 for (j = 1; j < size; j++) { 237 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 238 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 239 } 240 } 241 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 242 } 243 PetscCall(PetscLayoutDestroy(&layout)); 244 245 PetscCall(MatCreate(comm, &A)); 246 if (matproperties[7] > 0) { 247 PetscMPIInt ni; 248 249 PetscCall(PetscMPIIntCast(matproperties[7] + 1, &ni)); 250 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 251 PetscCallMPI(MPI_Bcast(cprefix, ni, MPI_CHAR, 0, comm)); 252 PetscCall(MatSetOptionsPrefix(A, cprefix)); 253 PetscCall(PetscFree(cprefix)); 254 } 255 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 256 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 257 PetscCall(MatSetType(A, MATMPIAIJ)); 258 259 if (!PCMPIServerUseShmget) { 260 PetscCallMPI(MPI_Scatter(NZ, 1, MPIU_INT, &nz, 1, MPIU_INT, 0, comm)); 261 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 262 PetscCallMPI(MPIU_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, n + 1, MPIU_INT, 0, comm)); 263 PetscCallMPI(MPIU_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm)); 264 PetscCallMPI(MPIU_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm)); 265 } else { 266 const void *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa}; 267 PCMPIServerAddresses *addresses; 268 269 PetscCall(PetscNew(&addresses)); 270 addresses->n = 3; 271 PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr)); 272 ia = rstart + (PetscInt *)addresses->addr[0]; 273 ja = ia[0] + (PetscInt *)addresses->addr[1]; 274 a = ia[0] + (PetscScalar *)addresses->addr[2]; 275 PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, PCMPIServerAddressesDestroy)); 276 } 277 278 if (pc) { 279 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 280 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 281 } 282 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 283 284 PetscCall(PetscLogStagePush(PCMPIStage)); 285 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 286 PetscCall(MatSetBlockSize(A, matproperties[2])); 287 288 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 289 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 290 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 291 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 292 293 if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a)); 294 PetscCall(KSPSetOperators(ksp, A, A)); 295 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 296 PetscCall(PetscLogStagePop()); 297 if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */ 298 const PetscInt *range; 299 300 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 301 for (i = 0; i < size; i++) { 302 km->sendcount[i] = range[i + 1] - range[i]; 303 km->displ[i] = range[i]; 304 } 305 } 306 PetscCall(MatDestroy(&A)); 307 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 308 PetscCall(KSPSetFromOptions(ksp)); 309 PCMPIServerInSolve = PETSC_FALSE; 310 PetscFunctionReturn(PETSC_SUCCESS); 311 } 312 313 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 314 { 315 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 316 KSP ksp; 317 Mat sA, A; 318 MPI_Comm comm = PC_MPI_COMM_WORLD; 319 const PetscInt *ia, *IA; 320 const PetscScalar *a; 321 PetscCount nz; 322 const PetscScalar *sa = NULL; 323 PetscMPIInt size; 324 PetscInt rstart, matproperties[4] = {0, 0, 0, 0}; 325 326 PetscFunctionBegin; 327 if (pc) { 328 PetscCall(PCGetOperators(pc, &sA, &sA)); 329 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 330 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 331 } 332 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 333 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 334 PCMPIServerInSolve = PETSC_TRUE; 335 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 336 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 337 PetscCallMPI(MPI_Comm_size(comm, &size)); 338 PCMPIMatCounts[size - 1]++; 339 PetscCall(KSPGetOperators(ksp, NULL, &A)); 340 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 341 if (!PCMPIServerUseShmget) { 342 PetscInt petsc_nz; 343 344 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 345 PetscCall(PetscIntCast(nz, &petsc_nz)); 346 PetscCall(PetscMalloc1(nz, &a)); 347 PetscCallMPI(MPIU_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, petsc_nz, MPIU_SCALAR, 0, comm)); 348 } else { 349 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 350 PCMPIServerAddresses *addresses; 351 PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses)); 352 ia = rstart + (PetscInt *)addresses->addr[0]; 353 a = ia[0] + (PetscScalar *)addresses->addr[2]; 354 } 355 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 356 if (pc) { 357 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 358 359 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 360 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 361 362 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 363 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 364 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 365 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 366 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 367 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 368 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 369 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 370 } 371 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 372 if (!PCMPIServerUseShmget) PetscCall(PetscFree(a)); 373 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 374 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 375 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 376 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 377 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 378 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 379 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 380 PCMPIServerInSolve = PETSC_FALSE; 381 PetscFunctionReturn(PETSC_SUCCESS); 382 } 383 384 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 385 { 386 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 387 KSP ksp; 388 MPI_Comm comm = PC_MPI_COMM_WORLD; 389 const PetscScalar *sb = NULL, *x; 390 PetscScalar *b, *sx = NULL; 391 PetscInt its, n; 392 PetscMPIInt size; 393 void *addr[2]; 394 395 PetscFunctionBegin; 396 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 397 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 398 PCMPIServerInSolve = PETSC_TRUE; 399 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 400 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 401 402 /* scatterv rhs */ 403 PetscCallMPI(MPI_Comm_size(comm, &size)); 404 if (pc) { 405 PetscInt N; 406 407 PCMPISolveCounts[size - 1]++; 408 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 409 PCMPISizes[size - 1] += N; 410 } 411 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 412 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 413 if (!PCMPIServerUseShmget) { 414 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 415 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 416 PetscCallMPI(MPIU_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 417 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 418 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 419 // TODO: scatter initial guess if needed 420 } else { 421 PetscInt rstart; 422 423 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 424 if (pc) PetscCall(VecGetArray(X, &sx)); 425 const void *inaddr[2] = {(const void **)sb, (const void **)sx}; 426 if (pc) PetscCall(VecRestoreArray(X, &sx)); 427 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 428 429 PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr)); 430 PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL)); 431 PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0])); 432 PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1])); 433 } 434 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 435 436 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 437 PetscCall(PetscLogStagePush(PCMPIStage)); 438 PetscCall(KSPSolve(ksp, NULL, NULL)); 439 PetscCall(PetscLogStagePop()); 440 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 441 PetscCall(KSPGetIterationNumber(ksp, &its)); 442 PCMPIIterations[size - 1] += its; 443 // TODO: send iterations up to outer KSP 444 445 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr)); 446 447 /* gather solution */ 448 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 449 if (!PCMPIServerUseShmget) { 450 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 451 if (pc) PetscCall(VecGetArray(X, &sx)); 452 PetscCallMPI(MPIU_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 453 if (pc) PetscCall(VecRestoreArray(X, &sx)); 454 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 455 } else { 456 PetscCall(VecResetArray(ksp->vec_rhs)); 457 PetscCall(VecResetArray(ksp->vec_sol)); 458 } 459 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 460 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 461 PCMPIServerInSolve = PETSC_FALSE; 462 PetscFunctionReturn(PETSC_SUCCESS); 463 } 464 465 static PetscErrorCode PCMPIDestroy(PC pc) 466 { 467 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 468 KSP ksp; 469 MPI_Comm comm = PC_MPI_COMM_WORLD; 470 471 PetscFunctionBegin; 472 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 473 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 474 PetscCall(PetscLogStagePush(PCMPIStage)); 475 PCMPIServerInSolve = PETSC_TRUE; 476 PetscCall(KSPDestroy(&ksp)); 477 PetscCall(PetscLogStagePop()); 478 PCMPIServerInSolve = PETSC_FALSE; 479 PetscFunctionReturn(PETSC_SUCCESS); 480 } 481 482 static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request) 483 { 484 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 485 PetscMPIInt dummy1 = 1, dummy2; 486 #endif 487 488 PetscFunctionBegin; 489 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 490 if (PCMPIServerUseShmget) { 491 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]); 492 } 493 #endif 494 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 495 /* next line ensures the sender has already taken the lock */ 496 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 497 if (PCMPIServerUseShmget) { 498 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 499 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]); 500 } 501 #endif 502 PetscFunctionReturn(PETSC_SUCCESS); 503 } 504 505 /*@C 506 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 507 parallel `KSP` solves and management of parallel `KSP` objects. 508 509 Logically Collective on all MPI processes except rank 0 510 511 Options Database Keys: 512 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 513 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 514 - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported) 515 516 Level: developer 517 518 Note: 519 This is normally started automatically in `PetscInitialize()` when the option is provided 520 521 See `PCMPI` for information on using the solver with a `KSP` object 522 523 Developer Notes: 524 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 525 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 526 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 527 528 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 529 530 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 531 532 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 533 534 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 535 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 536 537 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 538 all MPI processes but the user callback only runs on one MPI process per node. 539 540 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 541 the `MPI_Comm` argument from PETSc calls. 542 543 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 544 @*/ 545 PetscErrorCode PCMPIServerBegin(void) 546 { 547 PetscMPIInt rank; 548 549 PetscFunctionBegin; 550 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 551 if (PetscDefined(USE_SINGLE_LIBRARY)) { 552 PetscCall(VecInitializePackage()); 553 PetscCall(MatInitializePackage()); 554 PetscCall(DMInitializePackage()); 555 PetscCall(PCInitializePackage()); 556 PetscCall(KSPInitializePackage()); 557 PetscCall(SNESInitializePackage()); 558 PetscCall(TSInitializePackage()); 559 PetscCall(TaoInitializePackage()); 560 } 561 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 562 PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist)); 563 PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI)); 564 565 if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE; 566 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL)); 567 568 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 569 if (PCMPIServerUseShmget) { 570 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 571 PetscMPIInt size; 572 573 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 574 if (size > 1) { 575 pthread_mutex_t *locks; 576 577 if (rank == 0) { 578 PCMPIServerActive = PETSC_TRUE; 579 PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks)); 580 } 581 PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks)); 582 if (rank == 0) { 583 pthread_mutexattr_t attr; 584 585 pthread_mutexattr_init(&attr); 586 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); 587 588 for (int i = 1; i < size; i++) { 589 pthread_mutex_init(&PCMPIServerLocks[i], &attr); 590 pthread_mutex_lock(&PCMPIServerLocks[i]); 591 } 592 } 593 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 594 } 595 #endif 596 } 597 if (rank == 0) { 598 PETSC_COMM_WORLD = PETSC_COMM_SELF; 599 PCMPIServerActive = PETSC_TRUE; 600 PetscFunctionReturn(PETSC_SUCCESS); 601 } 602 603 while (PETSC_TRUE) { 604 PCMPICommand request = PCMPI_CREATE; 605 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 606 PetscMPIInt dummy1 = 1, dummy2; 607 #endif 608 609 // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters? 610 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 611 if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]); 612 #endif 613 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 614 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 615 if (PCMPIServerUseShmget) { 616 /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */ 617 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 618 pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]); 619 } 620 #endif 621 switch (request) { 622 case PCMPI_CREATE: 623 PetscCall(PCMPICreate(NULL)); 624 break; 625 case PCMPI_SET_MAT: 626 PetscCall(PCMPISetMat(NULL)); 627 break; 628 case PCMPI_UPDATE_MAT_VALUES: 629 PetscCall(PCMPIUpdateMatValues(NULL)); 630 break; 631 case PCMPI_VIEW: 632 // PetscCall(PCMPIView(NULL)); 633 break; 634 case PCMPI_SOLVE: 635 PetscCall(PCMPISolve(NULL, NULL, NULL)); 636 break; 637 case PCMPI_DESTROY: 638 PetscCall(PCMPIDestroy(NULL)); 639 break; 640 case PCMPI_EXIT: 641 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 642 PetscCall(PetscFinalize()); 643 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 644 break; 645 default: 646 break; 647 } 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 /*@C 653 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 654 parallel KSP solves and management of parallel `KSP` objects. 655 656 Logically Collective on all MPI ranks except 0 657 658 Level: developer 659 660 Note: 661 This is normally called automatically in `PetscFinalize()` 662 663 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 664 @*/ 665 PetscErrorCode PCMPIServerEnd(void) 666 { 667 PetscFunctionBegin; 668 if (PetscGlobalRank == 0) { 669 PetscViewer viewer = NULL; 670 PetscViewerFormat format; 671 672 PetscCall(PetscShmgetAddressesFinalize()); 673 PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT)); 674 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 675 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 676 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 677 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 678 PetscOptionsEnd(); 679 if (viewer) { 680 PetscBool isascii; 681 682 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 683 if (isascii) { 684 PetscMPIInt size; 685 PetscMPIInt i; 686 687 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 688 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 689 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 690 if (PCMPIKSPCountsSeq) { 691 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 692 } 693 for (i = 0; i < size; i++) { 694 if (PCMPIKSPCounts[i]) { 695 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 696 } 697 } 698 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not ")); 699 } 700 PetscCall(PetscViewerDestroy(&viewer)); 701 } 702 } 703 PetscCall(PCMPICommsDestroy()); 704 PCMPIServerActive = PETSC_FALSE; 705 PetscFunctionReturn(PETSC_SUCCESS); 706 } 707 708 /* 709 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 710 because, for example, the problem is small. This version is more efficient because it does not require copying any data 711 */ 712 static PetscErrorCode PCSetUp_Seq(PC pc) 713 { 714 PC_MPI *km = (PC_MPI *)pc->data; 715 Mat sA; 716 const char *prefix; 717 char *found = NULL, *cprefix; 718 719 PetscFunctionBegin; 720 PCMPIServerInSolve = PETSC_TRUE; 721 PetscCall(PCGetOperators(pc, NULL, &sA)); 722 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 723 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 724 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 725 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 726 727 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 728 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 729 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 730 PetscCall(PetscStrallocpy(prefix, &cprefix)); 731 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 732 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 733 *found = 0; 734 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 735 PetscCall(PetscFree(cprefix)); 736 737 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 738 PetscCall(KSPSetFromOptions(km->ksps[0])); 739 PetscCall(KSPSetUp(km->ksps[0])); 740 PetscCall(PetscInfo(pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 741 PCMPIKSPCountsSeq++; 742 PCMPIServerInSolve = PETSC_FALSE; 743 PetscFunctionReturn(PETSC_SUCCESS); 744 } 745 746 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 747 { 748 PC_MPI *km = (PC_MPI *)pc->data; 749 PetscInt its, n; 750 Mat A; 751 752 PetscFunctionBegin; 753 PCMPIServerInSolve = PETSC_TRUE; 754 PetscCall(KSPSolve(km->ksps[0], b, x)); 755 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 756 PCMPISolveCountsSeq++; 757 PCMPIIterationsSeq += its; 758 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 759 PetscCall(MatGetSize(A, &n, NULL)); 760 PCMPISizesSeq += n; 761 PCMPIServerInSolve = PETSC_FALSE; 762 /* 763 do not keep reference to previous rhs and solution since destroying them in the next KSPSolve() 764 my use PetscFree() instead of PCMPIArrayDeallocate() 765 */ 766 PetscCall(VecDestroy(&km->ksps[0]->vec_rhs)); 767 PetscCall(VecDestroy(&km->ksps[0]->vec_sol)); 768 PetscFunctionReturn(PETSC_SUCCESS); 769 } 770 771 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 772 { 773 PC_MPI *km = (PC_MPI *)pc->data; 774 775 PetscFunctionBegin; 776 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 777 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank)); 778 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 779 PetscFunctionReturn(PETSC_SUCCESS); 780 } 781 782 static PetscErrorCode PCDestroy_Seq(PC pc) 783 { 784 PC_MPI *km = (PC_MPI *)pc->data; 785 Mat A, B; 786 Vec x, b; 787 788 PetscFunctionBegin; 789 PCMPIServerInSolve = PETSC_TRUE; 790 /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */ 791 PetscCall(KSPGetOperators(km->ksps[0], &A, &B)); 792 PetscCall(PetscObjectReference((PetscObject)A)); 793 PetscCall(PetscObjectReference((PetscObject)B)); 794 PetscCall(KSPGetSolution(km->ksps[0], &x)); 795 PetscCall(PetscObjectReference((PetscObject)x)); 796 PetscCall(KSPGetRhs(km->ksps[0], &b)); 797 PetscCall(PetscObjectReference((PetscObject)b)); 798 PetscCall(KSPDestroy(&km->ksps[0])); 799 PetscCall(PetscFree(pc->data)); 800 PCMPIServerInSolve = PETSC_FALSE; 801 PetscCall(MatDestroy(&A)); 802 PetscCall(MatDestroy(&B)); 803 PetscCall(VecDestroy(&x)); 804 PetscCall(VecDestroy(&b)); 805 PetscFunctionReturn(PETSC_SUCCESS); 806 } 807 808 /* 809 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 810 right-hand side to the parallel PC 811 */ 812 static PetscErrorCode PCSetUp_MPI(PC pc) 813 { 814 PC_MPI *km = (PC_MPI *)pc->data; 815 PetscMPIInt rank, size; 816 PetscBool newmatrix = PETSC_FALSE; 817 818 PetscFunctionBegin; 819 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 820 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 821 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 822 823 if (!pc->setupcalled) { 824 if (!km->alwaysuseserver) { 825 PetscInt n; 826 Mat sA; 827 /* short circuit for small systems */ 828 PetscCall(PCGetOperators(pc, &sA, &sA)); 829 PetscCall(MatGetSize(sA, &n, NULL)); 830 if (n < 2 * km->mincntperrank - 1 || size == 1) { 831 pc->ops->setup = NULL; 832 pc->ops->apply = PCApply_Seq; 833 pc->ops->destroy = PCDestroy_Seq; 834 pc->ops->view = PCView_Seq; 835 PetscCall(PCSetUp_Seq(pc)); 836 PetscFunctionReturn(PETSC_SUCCESS); 837 } 838 } 839 840 PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE)); 841 PetscCall(PCMPICreate(pc)); 842 newmatrix = PETSC_TRUE; 843 } 844 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 845 846 if (newmatrix) { 847 PetscCall(PetscInfo(pc, "New matrix or matrix has changed nonzero structure\n")); 848 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT)); 849 PetscCall(PCMPISetMat(pc)); 850 } else { 851 PetscCall(PetscInfo(pc, "Matrix has only changed nonzero values\n")); 852 PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES)); 853 PetscCall(PCMPIUpdateMatValues(pc)); 854 } 855 PetscFunctionReturn(PETSC_SUCCESS); 856 } 857 858 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 859 { 860 PetscFunctionBegin; 861 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE)); 862 PetscCall(PCMPISolve(pc, b, x)); 863 PetscFunctionReturn(PETSC_SUCCESS); 864 } 865 866 static PetscErrorCode PCDestroy_MPI(PC pc) 867 { 868 PetscFunctionBegin; 869 PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY)); 870 PetscCall(PCMPIDestroy(pc)); 871 PetscCall(PetscFree(pc->data)); 872 PetscFunctionReturn(PETSC_SUCCESS); 873 } 874 875 /* 876 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database 877 */ 878 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 879 { 880 PC_MPI *km = (PC_MPI *)pc->data; 881 MPI_Comm comm; 882 PetscMPIInt size; 883 884 PetscFunctionBegin; 885 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 886 PetscCallMPI(MPI_Comm_size(comm, &size)); 887 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 888 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank)); 889 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n")); 890 PetscFunctionReturn(PETSC_SUCCESS); 891 } 892 893 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 894 { 895 PC_MPI *km = (PC_MPI *)pc->data; 896 897 PetscFunctionBegin; 898 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 899 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 900 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 901 PetscOptionsHeadEnd(); 902 PetscFunctionReturn(PETSC_SUCCESS); 903 } 904 905 /*MC 906 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 907 908 Options Database Keys for the Server: 909 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 910 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 911 - -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true 912 913 Options Database Keys for a specific `KSP` object 914 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 915 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 916 917 Level: developer 918 919 Notes: 920 This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or 921 `MatCreateSeqAIJWithArrays()` 922 923 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 924 925 It can be particularly useful for user OpenMP code or potentially user GPU code. 926 927 When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side 928 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 929 930 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 931 because they are not the actual solver objects. 932 933 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 934 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 935 936 Developer Note: 937 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 938 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 939 940 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 941 M*/ 942 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 943 { 944 PC_MPI *km; 945 char *found = NULL; 946 947 PetscFunctionBegin; 948 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 949 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 950 951 /* material from PCSetType() */ 952 PetscTryTypeMethod(pc, destroy); 953 pc->ops->destroy = NULL; 954 pc->data = NULL; 955 956 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 957 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 958 pc->modifysubmatrices = NULL; 959 pc->modifysubmatricesP = NULL; 960 pc->setupcalled = 0; 961 962 PetscCall(PetscNew(&km)); 963 pc->data = (void *)km; 964 965 km->mincntperrank = 10000; 966 967 pc->ops->setup = PCSetUp_MPI; 968 pc->ops->apply = PCApply_MPI; 969 pc->ops->destroy = PCDestroy_MPI; 970 pc->ops->view = PCView_MPI; 971 pc->ops->setfromoptions = PCSetFromOptions_MPI; 972 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 973 PetscFunctionReturn(PETSC_SUCCESS); 974 } 975 976 /*@ 977 PCMPIGetKSP - Gets the `KSP` created by the `PCMPI` 978 979 Not Collective 980 981 Input Parameter: 982 . pc - the preconditioner context 983 984 Output Parameter: 985 . innerksp - the inner `KSP` 986 987 Level: advanced 988 989 .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE` 990 @*/ 991 PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp) 992 { 993 PC_MPI *red = (PC_MPI *)pc->data; 994 995 PetscFunctionBegin; 996 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 997 PetscAssertPointer(innerksp, 2); 998 *innerksp = red->ksps[0]; 999 PetscFunctionReturn(PETSC_SUCCESS); 1000 } 1001