1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right-hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 14 #include <petsc/private/kspimpl.h> 15 #include <petscts.h> 16 #include <petsctao.h> 17 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 18 #include <pthread.h> 19 #endif 20 21 #define PC_MPI_MAX_RANKS 256 22 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 23 24 typedef struct { 25 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */ 26 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 27 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 28 PetscInt mincntperrank; /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */ 29 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI process is used for the solve */ 30 } PC_MPI; 31 32 typedef enum { 33 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 34 PCMPI_CREATE, 35 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 36 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 37 PCMPI_SOLVE, 38 PCMPI_VIEW, 39 PCMPI_DESTROY /* destroy a PC that is no longer needed */ 40 } PCMPICommand; 41 42 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 43 static PetscBool PCMPICommSet = PETSC_FALSE; 44 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 45 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 46 static PetscLogEvent EventServerDist, EventServerDistMPI; 47 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 48 static pthread_mutex_t *PCMPIServerLocks; 49 #else 50 static void *PCMPIServerLocks; 51 #endif 52 53 static PetscErrorCode PCMPICommsCreate(void) 54 { 55 MPI_Comm comm = PC_MPI_COMM_WORLD; 56 PetscMPIInt size, rank, i; 57 58 PetscFunctionBegin; 59 PetscCallMPI(MPI_Comm_size(comm, &size)); 60 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 61 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 62 /* comm for size 1 is useful only for debugging */ 63 for (i = 0; i < size; i++) { 64 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 65 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 66 PCMPISolveCounts[i] = 0; 67 PCMPIKSPCounts[i] = 0; 68 PCMPIIterations[i] = 0; 69 PCMPISizes[i] = 0; 70 } 71 PCMPICommSet = PETSC_TRUE; 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 static PetscErrorCode PCMPICommsDestroy(void) 76 { 77 MPI_Comm comm = PC_MPI_COMM_WORLD; 78 PetscMPIInt size, rank, i; 79 80 PetscFunctionBegin; 81 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 82 PetscCallMPI(MPI_Comm_size(comm, &size)); 83 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 84 for (i = 0; i < size; i++) { 85 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 86 } 87 PCMPICommSet = PETSC_FALSE; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode PCMPICreate(PC pc) 92 { 93 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 94 MPI_Comm comm = PC_MPI_COMM_WORLD; 95 KSP ksp; 96 PetscInt N[2], mincntperrank = 0; 97 PetscMPIInt size; 98 Mat sA; 99 char *cprefix = NULL; 100 PetscMPIInt len = 0; 101 102 PetscFunctionBegin; 103 PCMPIServerInSolve = PETSC_TRUE; 104 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 105 PetscCallMPI(MPI_Comm_size(comm, &size)); 106 if (pc) { 107 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 108 PetscCall(PCGetOperators(pc, &sA, &sA)); 109 PetscCall(MatGetSize(sA, &N[0], &N[1])); 110 } 111 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 112 113 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 114 if (km) mincntperrank = km->mincntperrank; 115 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 116 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 117 if (comm == MPI_COMM_NULL) { 118 ksp = NULL; 119 PCMPIServerInSolve = PETSC_FALSE; 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 PetscCall(PetscLogStagePush(PCMPIStage)); 123 PetscCall(KSPCreate(comm, &ksp)); 124 PetscCall(KSPSetNestLevel(ksp, 1)); 125 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 126 PetscCall(PetscLogStagePop()); 127 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 128 if (pc) { 129 size_t slen; 130 const char *prefix = NULL; 131 char *found = NULL; 132 133 PetscCallMPI(MPI_Comm_size(comm, &size)); 134 PCMPIKSPCounts[size - 1]++; 135 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 136 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 137 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 138 PetscCall(PetscStrallocpy(prefix, &cprefix)); 139 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 140 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 141 *found = 0; 142 PetscCall(PetscStrlen(cprefix, &slen)); 143 len = (PetscMPIInt)slen; 144 } 145 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 146 if (len) { 147 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 148 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 149 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 150 } 151 PetscCall(PetscFree(cprefix)); 152 PCMPIServerInSolve = PETSC_FALSE; 153 PetscFunctionReturn(PETSC_SUCCESS); 154 } 155 156 static PetscErrorCode PCMPISetMat(PC pc) 157 { 158 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 159 Mat A; 160 PetscInt m, n, j, bs; 161 Mat sA; 162 MPI_Comm comm = PC_MPI_COMM_WORLD; 163 KSP ksp; 164 PetscLayout layout; 165 const PetscInt *IA = NULL, *JA = NULL, *ia, *ja; 166 const PetscInt *range; 167 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 168 const PetscScalar *a = NULL, *sa; 169 PetscInt matproperties[8] = {0}, rstart, rend; 170 char *cprefix; 171 172 PetscFunctionBegin; 173 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 174 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 175 PCMPIServerInSolve = PETSC_TRUE; 176 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 177 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 178 if (pc) { 179 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 180 const char *prefix; 181 size_t clen; 182 183 PetscCallMPI(MPI_Comm_size(comm, &size)); 184 PCMPIMatCounts[size - 1]++; 185 PetscCall(PCGetOperators(pc, &sA, &sA)); 186 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 187 PetscCall(MatGetBlockSize(sA, &bs)); 188 matproperties[2] = bs; 189 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 190 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 191 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 192 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 193 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 194 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 195 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 196 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 197 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 198 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 199 PetscCall(PetscStrallocpy(prefix, &cprefix)); 200 PetscCall(PetscStrlen(cprefix, &clen)); 201 matproperties[7] = (PetscInt)clen; 202 } 203 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 204 205 /* determine ownership ranges of matrix columns */ 206 PetscCall(PetscLayoutCreate(comm, &layout)); 207 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 208 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 209 PetscCall(PetscLayoutSetUp(layout)); 210 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 211 PetscCall(PetscLayoutDestroy(&layout)); 212 213 /* determine ownership ranges of matrix rows */ 214 PetscCall(PetscLayoutCreate(comm, &layout)); 215 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 216 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 217 PetscCall(PetscLayoutSetUp(layout)); 218 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 219 PetscCall(PetscLayoutGetRange(layout, &rstart, &rend)); 220 221 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 222 /* copy over the matrix nonzero structure and values */ 223 if (pc) { 224 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 225 if (!PCMPIServerUseShmget) { 226 NZ = km->NZ; 227 NZdispl = km->NZdispl; 228 PetscCall(PetscLayoutGetRanges(layout, &range)); 229 for (i = 0; i < size; i++) { 230 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 231 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 232 } 233 displi[0] = 0; 234 NZdispl[0] = 0; 235 for (j = 1; j < size; j++) { 236 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 237 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 238 } 239 } 240 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 241 } 242 PetscCall(PetscLayoutDestroy(&layout)); 243 244 PetscCall(MatCreate(comm, &A)); 245 if (matproperties[7] > 0) { 246 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 247 PetscCallMPI(MPI_Bcast(cprefix, (PetscMPIInt)(matproperties[7] + 1), MPI_CHAR, 0, comm)); 248 PetscCall(MatSetOptionsPrefix(A, cprefix)); 249 PetscCall(PetscFree(cprefix)); 250 } 251 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 252 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 253 PetscCall(MatSetType(A, MATMPIAIJ)); 254 255 if (!PCMPIServerUseShmget) { 256 PetscMPIInt in; 257 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 258 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 259 PetscCall(PetscMPIIntCast(n, &in)); 260 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, in + 1, MPIU_INT, 0, comm)); 261 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm)); 262 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm)); 263 } else { 264 const void *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa}; 265 PCMPIServerAddresses *addresses; 266 267 PetscCall(PetscNew(&addresses)); 268 addresses->n = 3; 269 PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr)); 270 ia = rstart + (PetscInt *)addresses->addr[0]; 271 ja = ia[0] + (PetscInt *)addresses->addr[1]; 272 a = ia[0] + (PetscScalar *)addresses->addr[2]; 273 PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, (PetscErrorCode (*)(void *))PCMPIServerAddressesDestroy)); 274 } 275 276 if (pc) { 277 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 278 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 279 } 280 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 281 282 PetscCall(PetscLogStagePush(PCMPIStage)); 283 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 284 PetscCall(MatSetBlockSize(A, matproperties[2])); 285 286 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 287 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 288 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 289 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 290 291 if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a)); 292 PetscCall(KSPSetOperators(ksp, A, A)); 293 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 294 PetscCall(PetscLogStagePop()); 295 if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */ 296 const PetscInt *range; 297 298 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 299 for (i = 0; i < size; i++) { 300 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 301 km->displ[i] = (PetscMPIInt)range[i]; 302 } 303 } 304 PetscCall(MatDestroy(&A)); 305 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 306 PetscCall(KSPSetFromOptions(ksp)); 307 PCMPIServerInSolve = PETSC_FALSE; 308 PetscFunctionReturn(PETSC_SUCCESS); 309 } 310 311 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 312 { 313 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 314 KSP ksp; 315 Mat sA, A; 316 MPI_Comm comm = PC_MPI_COMM_WORLD; 317 const PetscInt *ia, *IA; 318 const PetscScalar *a; 319 PetscCount nz; 320 const PetscScalar *sa = NULL; 321 PetscMPIInt size; 322 PetscInt rstart, matproperties[4] = {0, 0, 0, 0}; 323 324 PetscFunctionBegin; 325 if (pc) { 326 PetscCall(PCGetOperators(pc, &sA, &sA)); 327 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 328 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 329 } 330 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 331 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 332 PCMPIServerInSolve = PETSC_TRUE; 333 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 334 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 335 PetscCallMPI(MPI_Comm_size(comm, &size)); 336 PCMPIMatCounts[size - 1]++; 337 PetscCall(KSPGetOperators(ksp, NULL, &A)); 338 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 339 if (!PCMPIServerUseShmget) { 340 PetscMPIInt mpi_nz; 341 342 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 343 PetscCall(PetscMPIIntCast(nz, &mpi_nz)); 344 PetscCall(PetscMalloc1(nz, &a)); 345 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, mpi_nz, MPIU_SCALAR, 0, comm)); 346 } else { 347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 348 PCMPIServerAddresses *addresses; 349 PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses)); 350 ia = rstart + (PetscInt *)addresses->addr[0]; 351 a = ia[0] + (PetscScalar *)addresses->addr[2]; 352 } 353 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 354 if (pc) { 355 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 356 357 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 358 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 359 360 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 361 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 362 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 363 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 364 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 365 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 366 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 367 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 368 } 369 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 370 if (!PCMPIServerUseShmget) PetscCall(PetscFree(a)); 371 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 372 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 373 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 374 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 375 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 376 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 377 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 378 PCMPIServerInSolve = PETSC_FALSE; 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 383 { 384 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 385 KSP ksp; 386 MPI_Comm comm = PC_MPI_COMM_WORLD; 387 const PetscScalar *sb = NULL, *x; 388 PetscScalar *b, *sx = NULL; 389 PetscInt its, n; 390 PetscMPIInt size; 391 void *addr[2]; 392 393 PetscFunctionBegin; 394 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 395 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 396 PCMPIServerInSolve = PETSC_TRUE; 397 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 398 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 399 400 /* scatterv rhs */ 401 PetscCallMPI(MPI_Comm_size(comm, &size)); 402 if (pc) { 403 PetscInt N; 404 405 PCMPISolveCounts[size - 1]++; 406 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 407 PCMPISizes[size - 1] += N; 408 } 409 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 410 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 411 if (!PCMPIServerUseShmget) { 412 PetscMPIInt in; 413 414 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 415 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 416 PetscCall(PetscMPIIntCast(n, &in)); 417 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, in, MPIU_SCALAR, 0, comm)); 418 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 419 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 420 // TODO: scatter initial guess if needed 421 } else { 422 PetscInt rstart; 423 424 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 425 if (pc) PetscCall(VecGetArray(X, &sx)); 426 const void *inaddr[2] = {(const void **)sb, (const void **)sx}; 427 if (pc) PetscCall(VecRestoreArray(X, &sx)); 428 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 429 430 PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr)); 431 PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL)); 432 PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0])); 433 PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1])); 434 } 435 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 436 437 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 438 PetscCall(PetscLogStagePush(PCMPIStage)); 439 PetscCall(KSPSolve(ksp, NULL, NULL)); 440 PetscCall(PetscLogStagePop()); 441 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 442 PetscCall(KSPGetIterationNumber(ksp, &its)); 443 PCMPIIterations[size - 1] += its; 444 // TODO: send iterations up to outer KSP 445 446 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr)); 447 448 /* gather solution */ 449 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 450 if (!PCMPIServerUseShmget) { 451 PetscMPIInt in; 452 453 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 454 if (pc) PetscCall(VecGetArray(X, &sx)); 455 PetscCall(PetscMPIIntCast(n, &in)); 456 PetscCallMPI(MPI_Gatherv(x, in, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 457 if (pc) PetscCall(VecRestoreArray(X, &sx)); 458 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 459 } else { 460 PetscCall(VecResetArray(ksp->vec_rhs)); 461 PetscCall(VecResetArray(ksp->vec_sol)); 462 } 463 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 464 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 465 PCMPIServerInSolve = PETSC_FALSE; 466 PetscFunctionReturn(PETSC_SUCCESS); 467 } 468 469 static PetscErrorCode PCMPIDestroy(PC pc) 470 { 471 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 472 KSP ksp; 473 MPI_Comm comm = PC_MPI_COMM_WORLD; 474 475 PetscFunctionBegin; 476 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 477 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 478 PetscCall(PetscLogStagePush(PCMPIStage)); 479 PCMPIServerInSolve = PETSC_TRUE; 480 PetscCall(KSPDestroy(&ksp)); 481 PetscCall(PetscLogStagePop()); 482 PCMPIServerInSolve = PETSC_FALSE; 483 PetscFunctionReturn(PETSC_SUCCESS); 484 } 485 486 static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request) 487 { 488 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 489 PetscMPIInt dummy1 = 1, dummy2; 490 #endif 491 492 PetscFunctionBegin; 493 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 494 if (PCMPIServerUseShmget) { 495 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]); 496 } 497 #endif 498 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 499 /* next line ensures the sender has already taken the lock */ 500 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 501 if (PCMPIServerUseShmget) { 502 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 503 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]); 504 } 505 #endif 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 /*@C 510 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 511 parallel `KSP` solves and management of parallel `KSP` objects. 512 513 Logically Collective on all MPI processes except rank 0 514 515 Options Database Keys: 516 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 517 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 518 - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported) 519 520 Level: developer 521 522 Note: 523 This is normally started automatically in `PetscInitialize()` when the option is provided 524 525 See `PCMPI` for information on using the solver with a `KSP` object 526 527 See `PetscShmgetAllocateArray()` for instructions on how to ensure the shared memory is available on your machine. 528 529 Developer Notes: 530 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 531 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 532 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 533 534 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 535 536 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 537 538 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 539 540 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 541 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 542 543 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 544 all MPI processes but the user callback only runs on one MPI process per node. 545 546 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 547 the `MPI_Comm` argument from PETSc calls. 548 549 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 550 @*/ 551 PetscErrorCode PCMPIServerBegin(void) 552 { 553 PetscMPIInt rank; 554 555 PetscFunctionBegin; 556 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 557 if (PetscDefined(USE_SINGLE_LIBRARY)) { 558 PetscCall(VecInitializePackage()); 559 PetscCall(MatInitializePackage()); 560 PetscCall(DMInitializePackage()); 561 PetscCall(PCInitializePackage()); 562 PetscCall(KSPInitializePackage()); 563 PetscCall(SNESInitializePackage()); 564 PetscCall(TSInitializePackage()); 565 PetscCall(TaoInitializePackage()); 566 } 567 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 568 PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist)); 569 PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI)); 570 571 if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE; 572 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL)); 573 574 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 575 if (PCMPIServerUseShmget) { 576 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 577 PetscMPIInt size; 578 579 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 580 if (size > 1) { 581 pthread_mutex_t *locks; 582 583 if (rank == 0) { 584 PCMPIServerActive = PETSC_TRUE; 585 PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks)); 586 } 587 PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks)); 588 if (rank == 0) { 589 pthread_mutexattr_t attr; 590 591 pthread_mutexattr_init(&attr); 592 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); 593 594 for (int i = 1; i < size; i++) { 595 pthread_mutex_init(&PCMPIServerLocks[i], &attr); 596 pthread_mutex_lock(&PCMPIServerLocks[i]); 597 } 598 } 599 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 600 } 601 #endif 602 } 603 if (rank == 0) { 604 PETSC_COMM_WORLD = PETSC_COMM_SELF; 605 PCMPIServerActive = PETSC_TRUE; 606 PetscFunctionReturn(PETSC_SUCCESS); 607 } 608 609 while (PETSC_TRUE) { 610 PCMPICommand request = PCMPI_CREATE; 611 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 612 PetscMPIInt dummy1 = 1, dummy2; 613 #endif 614 615 // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters? 616 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 617 if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]); 618 #endif 619 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 620 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 621 if (PCMPIServerUseShmget) { 622 /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */ 623 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 624 pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]); 625 } 626 #endif 627 switch (request) { 628 case PCMPI_CREATE: 629 PetscCall(PCMPICreate(NULL)); 630 break; 631 case PCMPI_SET_MAT: 632 PetscCall(PCMPISetMat(NULL)); 633 break; 634 case PCMPI_UPDATE_MAT_VALUES: 635 PetscCall(PCMPIUpdateMatValues(NULL)); 636 break; 637 case PCMPI_VIEW: 638 // PetscCall(PCMPIView(NULL)); 639 break; 640 case PCMPI_SOLVE: 641 PetscCall(PCMPISolve(NULL, NULL, NULL)); 642 break; 643 case PCMPI_DESTROY: 644 PetscCall(PCMPIDestroy(NULL)); 645 break; 646 case PCMPI_EXIT: 647 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 648 PetscCall(PetscFinalize()); 649 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 650 break; 651 default: 652 break; 653 } 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /*@C 659 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 660 parallel KSP solves and management of parallel `KSP` objects. 661 662 Logically Collective on all MPI ranks except 0 663 664 Level: developer 665 666 Note: 667 This is normally called automatically in `PetscFinalize()` 668 669 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 670 @*/ 671 PetscErrorCode PCMPIServerEnd(void) 672 { 673 PetscFunctionBegin; 674 if (PetscGlobalRank == 0) { 675 PetscViewer viewer = NULL; 676 PetscViewerFormat format; 677 678 PetscCall(PetscShmgetAddressesFinalize()); 679 PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT)); 680 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 681 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 682 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 683 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 684 PetscOptionsEnd(); 685 if (viewer) { 686 PetscBool isascii; 687 688 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 689 if (isascii) { 690 PetscMPIInt size; 691 PetscMPIInt i; 692 693 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 694 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 695 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 696 if (PCMPIKSPCountsSeq) { 697 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 698 } 699 for (i = 0; i < size; i++) { 700 if (PCMPIKSPCounts[i]) { 701 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 702 } 703 } 704 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not ")); 705 } 706 PetscCall(PetscViewerDestroy(&viewer)); 707 } 708 } 709 PetscCall(PCMPICommsDestroy()); 710 PCMPIServerActive = PETSC_FALSE; 711 PetscFunctionReturn(PETSC_SUCCESS); 712 } 713 714 /* 715 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 716 because, for example, the problem is small. This version is more efficient because it does not require copying any data 717 */ 718 static PetscErrorCode PCSetUp_Seq(PC pc) 719 { 720 PC_MPI *km = (PC_MPI *)pc->data; 721 Mat sA; 722 const char *prefix; 723 char *found = NULL, *cprefix; 724 725 PetscFunctionBegin; 726 PCMPIServerInSolve = PETSC_TRUE; 727 PetscCall(PCGetOperators(pc, NULL, &sA)); 728 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 729 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 730 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 731 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 732 733 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 734 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 735 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 736 PetscCall(PetscStrallocpy(prefix, &cprefix)); 737 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 738 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 739 *found = 0; 740 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 741 PetscCall(PetscFree(cprefix)); 742 743 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 744 PetscCall(KSPSetFromOptions(km->ksps[0])); 745 PetscCall(KSPSetUp(km->ksps[0])); 746 PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 747 PCMPIKSPCountsSeq++; 748 PCMPIServerInSolve = PETSC_FALSE; 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 753 { 754 PC_MPI *km = (PC_MPI *)pc->data; 755 PetscInt its, n; 756 Mat A; 757 758 PetscFunctionBegin; 759 PCMPIServerInSolve = PETSC_TRUE; 760 PetscCall(KSPSolve(km->ksps[0], b, x)); 761 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 762 PCMPISolveCountsSeq++; 763 PCMPIIterationsSeq += its; 764 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 765 PetscCall(MatGetSize(A, &n, NULL)); 766 PCMPISizesSeq += n; 767 PCMPIServerInSolve = PETSC_FALSE; 768 /* 769 do not keep reference to previous rhs and solution since destroying them in the next KSPSolve() 770 my use PetscFree() instead of PCMPIArrayDeallocate() 771 */ 772 PetscCall(VecDestroy(&km->ksps[0]->vec_rhs)); 773 PetscCall(VecDestroy(&km->ksps[0]->vec_sol)); 774 PetscFunctionReturn(PETSC_SUCCESS); 775 } 776 777 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 778 { 779 PC_MPI *km = (PC_MPI *)pc->data; 780 781 PetscFunctionBegin; 782 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 783 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 784 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 785 PetscFunctionReturn(PETSC_SUCCESS); 786 } 787 788 static PetscErrorCode PCDestroy_Seq(PC pc) 789 { 790 PC_MPI *km = (PC_MPI *)pc->data; 791 Mat A, B; 792 Vec x, b; 793 794 PetscFunctionBegin; 795 PCMPIServerInSolve = PETSC_TRUE; 796 /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */ 797 PetscCall(KSPGetOperators(km->ksps[0], &A, &B)); 798 PetscCall(PetscObjectReference((PetscObject)A)); 799 PetscCall(PetscObjectReference((PetscObject)B)); 800 PetscCall(KSPGetSolution(km->ksps[0], &x)); 801 PetscCall(PetscObjectReference((PetscObject)x)); 802 PetscCall(KSPGetRhs(km->ksps[0], &b)); 803 PetscCall(PetscObjectReference((PetscObject)b)); 804 PetscCall(KSPDestroy(&km->ksps[0])); 805 PetscCall(PetscFree(pc->data)); 806 PCMPIServerInSolve = PETSC_FALSE; 807 PetscCall(MatDestroy(&A)); 808 PetscCall(MatDestroy(&B)); 809 PetscCall(VecDestroy(&x)); 810 PetscCall(VecDestroy(&b)); 811 PetscFunctionReturn(PETSC_SUCCESS); 812 } 813 814 /* 815 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 816 right-hand side to the parallel PC 817 */ 818 static PetscErrorCode PCSetUp_MPI(PC pc) 819 { 820 PC_MPI *km = (PC_MPI *)pc->data; 821 PetscMPIInt rank, size; 822 PetscBool newmatrix = PETSC_FALSE; 823 824 PetscFunctionBegin; 825 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 826 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 827 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 828 829 if (!pc->setupcalled) { 830 if (!km->alwaysuseserver) { 831 PetscInt n; 832 Mat sA; 833 /* short circuit for small systems */ 834 PetscCall(PCGetOperators(pc, &sA, &sA)); 835 PetscCall(MatGetSize(sA, &n, NULL)); 836 if (n < 2 * km->mincntperrank - 1 || size == 1) { 837 pc->ops->setup = NULL; 838 pc->ops->apply = PCApply_Seq; 839 pc->ops->destroy = PCDestroy_Seq; 840 pc->ops->view = PCView_Seq; 841 PetscCall(PCSetUp_Seq(pc)); 842 PetscFunctionReturn(PETSC_SUCCESS); 843 } 844 } 845 846 PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE)); 847 PetscCall(PCMPICreate(pc)); 848 newmatrix = PETSC_TRUE; 849 } 850 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 851 852 if (newmatrix) { 853 PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 854 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT)); 855 PetscCall(PCMPISetMat(pc)); 856 } else { 857 PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n")); 858 PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES)); 859 PetscCall(PCMPIUpdateMatValues(pc)); 860 } 861 PetscFunctionReturn(PETSC_SUCCESS); 862 } 863 864 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 865 { 866 PetscFunctionBegin; 867 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE)); 868 PetscCall(PCMPISolve(pc, b, x)); 869 PetscFunctionReturn(PETSC_SUCCESS); 870 } 871 872 static PetscErrorCode PCDestroy_MPI(PC pc) 873 { 874 PetscFunctionBegin; 875 PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY)); 876 PetscCall(PCMPIDestroy(pc)); 877 PetscCall(PetscFree(pc->data)); 878 PetscFunctionReturn(PETSC_SUCCESS); 879 } 880 881 /* 882 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database 883 */ 884 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 885 { 886 PC_MPI *km = (PC_MPI *)pc->data; 887 MPI_Comm comm; 888 PetscMPIInt size; 889 890 PetscFunctionBegin; 891 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 892 PetscCallMPI(MPI_Comm_size(comm, &size)); 893 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 894 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %d\n", (int)km->mincntperrank)); 895 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n")); 896 PetscFunctionReturn(PETSC_SUCCESS); 897 } 898 899 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 900 { 901 PC_MPI *km = (PC_MPI *)pc->data; 902 903 PetscFunctionBegin; 904 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 905 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 906 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 907 PetscOptionsHeadEnd(); 908 PetscFunctionReturn(PETSC_SUCCESS); 909 } 910 911 /*MC 912 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 913 914 Options Database Keys for the Server: 915 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 916 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 917 - -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true 918 919 Options Database Keys for a specific `KSP` object 920 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 921 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 922 923 Level: developer 924 925 Notes: 926 This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or 927 `MatCreateSeqAIJWithArrays()` 928 929 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 930 931 It can be particularly useful for user OpenMP code or potentially user GPU code. 932 933 When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side 934 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 935 936 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 937 because they are not the actual solver objects. 938 939 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 940 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 941 942 Developer Note: 943 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 944 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 945 946 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 947 M*/ 948 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 949 { 950 PC_MPI *km; 951 char *found = NULL; 952 953 PetscFunctionBegin; 954 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 955 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 956 957 /* material from PCSetType() */ 958 PetscTryTypeMethod(pc, destroy); 959 pc->ops->destroy = NULL; 960 pc->data = NULL; 961 962 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 963 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 964 pc->modifysubmatrices = NULL; 965 pc->modifysubmatricesP = NULL; 966 pc->setupcalled = 0; 967 968 PetscCall(PetscNew(&km)); 969 pc->data = (void *)km; 970 971 km->mincntperrank = 10000; 972 973 pc->ops->setup = PCSetUp_MPI; 974 pc->ops->apply = PCApply_MPI; 975 pc->ops->destroy = PCDestroy_MPI; 976 pc->ops->view = PCView_MPI; 977 pc->ops->setfromoptions = PCSetFromOptions_MPI; 978 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 979 PetscFunctionReturn(PETSC_SUCCESS); 980 } 981 982 /*@ 983 PCMPIGetKSP - Gets the `KSP` created by the `PCMPI` 984 985 Not Collective 986 987 Input Parameter: 988 . pc - the preconditioner context 989 990 Output Parameter: 991 . innerksp - the inner `KSP` 992 993 Level: advanced 994 995 .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE` 996 @*/ 997 PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp) 998 { 999 PC_MPI *red = (PC_MPI *)pc->data; 1000 1001 PetscFunctionBegin; 1002 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1003 PetscAssertPointer(innerksp, 2); 1004 *innerksp = red->ksps[0]; 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007