1 /*
2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4
5 That program may use OpenMP to compute the right-hand side and matrix for the linear system
6
7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8
9 The resulting KSP and PC can only be controlled via the options database, though some common commands
10 could be passed through the server.
11
12 */
13 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/
14 #include <petsc/private/kspimpl.h>
15 #include <petscts.h>
16 #include <petsctao.h>
17 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
18 #include <pthread.h>
19 #endif
20
21 #define PC_MPI_MAX_RANKS 256
22 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
23
24 typedef struct {
25 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */
26 PetscInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
27 PetscInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */
28 PetscInt mincntperrank; /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */
29 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI process is used for the solve */
30 } PC_MPI;
31
32 typedef enum {
33 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
34 PCMPI_CREATE,
35 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */
36 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
37 PCMPI_SOLVE,
38 PCMPI_VIEW,
39 PCMPI_DESTROY /* destroy a PC that is no longer needed */
40 } PCMPICommand;
41
42 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS];
43 static PetscBool PCMPICommSet = PETSC_FALSE;
44 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
45 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
46 static PetscLogEvent EventServerDist, EventServerDistMPI;
47 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
48 static pthread_mutex_t *PCMPIServerLocks;
49 #else
50 static void *PCMPIServerLocks;
51 #endif
52
PCMPICommsCreate(void)53 static PetscErrorCode PCMPICommsCreate(void)
54 {
55 MPI_Comm comm = PC_MPI_COMM_WORLD;
56 PetscMPIInt size, rank, i;
57
58 PetscFunctionBegin;
59 PetscCallMPI(MPI_Comm_size(comm, &size));
60 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
61 PetscCallMPI(MPI_Comm_rank(comm, &rank));
62 /* comm for size 1 is useful only for debugging */
63 for (i = 0; i < size; i++) {
64 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
65 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
66 PCMPISolveCounts[i] = 0;
67 PCMPIKSPCounts[i] = 0;
68 PCMPIIterations[i] = 0;
69 PCMPISizes[i] = 0;
70 }
71 PCMPICommSet = PETSC_TRUE;
72 PetscFunctionReturn(PETSC_SUCCESS);
73 }
74
PCMPICommsDestroy(void)75 static PetscErrorCode PCMPICommsDestroy(void)
76 {
77 MPI_Comm comm = PC_MPI_COMM_WORLD;
78 PetscMPIInt size, rank, i;
79
80 PetscFunctionBegin;
81 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
82 PetscCallMPI(MPI_Comm_size(comm, &size));
83 PetscCallMPI(MPI_Comm_rank(comm, &rank));
84 for (i = 0; i < size; i++) {
85 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
86 }
87 PCMPICommSet = PETSC_FALSE;
88 PetscFunctionReturn(PETSC_SUCCESS);
89 }
90
PCMPICreate(PC pc)91 static PetscErrorCode PCMPICreate(PC pc)
92 {
93 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
94 MPI_Comm comm = PC_MPI_COMM_WORLD;
95 KSP ksp;
96 PetscInt N[2], mincntperrank = 0;
97 PetscMPIInt size;
98 Mat sA;
99 char *cprefix = NULL;
100 PetscMPIInt len = 0;
101
102 PetscFunctionBegin;
103 PCMPIServerInSolve = PETSC_TRUE;
104 if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
105 PetscCallMPI(MPI_Comm_size(comm, &size));
106 if (pc) {
107 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
108 PetscCall(PCGetOperators(pc, &sA, &sA));
109 PetscCall(MatGetSize(sA, &N[0], &N[1]));
110 }
111 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
112
113 /* choose a suitable sized MPI_Comm for the problem to be solved on */
114 if (km) mincntperrank = km->mincntperrank;
115 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
116 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
117 if (comm == MPI_COMM_NULL) {
118 ksp = NULL;
119 PCMPIServerInSolve = PETSC_FALSE;
120 PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 PetscCall(PetscLogStagePush(PCMPIStage));
123 PetscCall(KSPCreate(comm, &ksp));
124 PetscCall(KSPSetNestLevel(ksp, 1));
125 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
126 PetscCall(PetscLogStagePop());
127 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
128 if (pc) {
129 size_t slen;
130 const char *prefix = NULL;
131 char *found = NULL;
132
133 PetscCallMPI(MPI_Comm_size(comm, &size));
134 PCMPIKSPCounts[size - 1]++;
135 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
136 PetscCall(PCGetOptionsPrefix(pc, &prefix));
137 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
138 PetscCall(PetscStrallocpy(prefix, &cprefix));
139 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
140 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
141 *found = 0;
142 PetscCall(PetscStrlen(cprefix, &slen));
143 PetscCall(PetscMPIIntCast(slen, &len));
144 }
145 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
146 if (len) {
147 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
148 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
149 PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
150 }
151 PetscCall(PetscFree(cprefix));
152 PCMPIServerInSolve = PETSC_FALSE;
153 PetscFunctionReturn(PETSC_SUCCESS);
154 }
155
PCMPISetMat(PC pc)156 static PetscErrorCode PCMPISetMat(PC pc)
157 {
158 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
159 Mat A;
160 PetscInt m, n, j, bs;
161 Mat sA;
162 MPI_Comm comm = PC_MPI_COMM_WORLD;
163 KSP ksp;
164 PetscLayout layout;
165 const PetscInt *IA = NULL, *JA = NULL, *ia, *ja;
166 const PetscInt *range;
167 PetscInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz;
168 PetscMPIInt size, i;
169 const PetscScalar *a = NULL, *sa;
170 PetscInt matproperties[8] = {0}, rstart, rend;
171 char *cprefix;
172
173 PetscFunctionBegin;
174 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
175 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
176 PCMPIServerInSolve = PETSC_TRUE;
177 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
178 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
179 if (pc) {
180 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
181 const char *prefix;
182 size_t clen;
183
184 PetscCallMPI(MPI_Comm_size(comm, &size));
185 PCMPIMatCounts[size - 1]++;
186 PetscCall(PCGetOperators(pc, &sA, &sA));
187 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
188 PetscCall(MatGetBlockSize(sA, &bs));
189 matproperties[2] = bs;
190 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
191 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
192 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
193 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
194 PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
195 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
196 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
197 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
198 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
199 PetscCall(MatGetOptionsPrefix(sA, &prefix));
200 PetscCall(PetscStrallocpy(prefix, &cprefix));
201 PetscCall(PetscStrlen(cprefix, &clen));
202 matproperties[7] = (PetscInt)clen;
203 }
204 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
205
206 /* determine ownership ranges of matrix columns */
207 PetscCall(PetscLayoutCreate(comm, &layout));
208 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
209 PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
210 PetscCall(PetscLayoutSetUp(layout));
211 PetscCall(PetscLayoutGetLocalSize(layout, &n));
212 PetscCall(PetscLayoutDestroy(&layout));
213
214 /* determine ownership ranges of matrix rows */
215 PetscCall(PetscLayoutCreate(comm, &layout));
216 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
217 PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
218 PetscCall(PetscLayoutSetUp(layout));
219 PetscCall(PetscLayoutGetLocalSize(layout, &m));
220 PetscCall(PetscLayoutGetRange(layout, &rstart, &rend));
221
222 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
223 /* copy over the matrix nonzero structure and values */
224 if (pc) {
225 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
226 if (!PCMPIServerUseShmget) {
227 NZ = km->NZ;
228 NZdispl = km->NZdispl;
229 PetscCall(PetscLayoutGetRanges(layout, &range));
230 for (i = 0; i < size; i++) {
231 sendcounti[i] = 1 + range[i + 1] - range[i];
232 NZ[i] = IA[range[i + 1]] - IA[range[i]];
233 }
234 displi[0] = 0;
235 NZdispl[0] = 0;
236 for (j = 1; j < size; j++) {
237 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1;
238 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
239 }
240 }
241 PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
242 }
243 PetscCall(PetscLayoutDestroy(&layout));
244
245 PetscCall(MatCreate(comm, &A));
246 if (matproperties[7] > 0) {
247 PetscMPIInt ni;
248
249 PetscCall(PetscMPIIntCast(matproperties[7] + 1, &ni));
250 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
251 PetscCallMPI(MPI_Bcast(cprefix, ni, MPI_CHAR, 0, comm));
252 PetscCall(MatSetOptionsPrefix(A, cprefix));
253 PetscCall(PetscFree(cprefix));
254 }
255 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
256 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
257 PetscCall(MatSetType(A, MATMPIAIJ));
258
259 if (!PCMPIServerUseShmget) {
260 PetscCallMPI(MPI_Scatter(NZ, 1, MPIU_INT, &nz, 1, MPIU_INT, 0, comm));
261 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
262 PetscCallMPI(MPIU_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, n + 1, MPIU_INT, 0, comm));
263 PetscCallMPI(MPIU_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm));
264 PetscCallMPI(MPIU_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm));
265 } else {
266 const void *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa};
267 PCMPIServerAddresses *addresses;
268
269 PetscCall(PetscNew(&addresses));
270 addresses->n = 3;
271 PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr));
272 ia = rstart + (PetscInt *)addresses->addr[0];
273 ja = ia[0] + (PetscInt *)addresses->addr[1];
274 a = ia[0] + (PetscScalar *)addresses->addr[2];
275 PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, PCMPIServerAddressesDestroy));
276 }
277
278 if (pc) {
279 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
280 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
281 }
282 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
283
284 PetscCall(PetscLogStagePush(PCMPIStage));
285 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
286 PetscCall(MatSetBlockSize(A, matproperties[2]));
287
288 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
289 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
290 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
291 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
292
293 if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a));
294 PetscCall(KSPSetOperators(ksp, A, A));
295 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
296 PetscCall(PetscLogStagePop());
297 if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */
298 const PetscInt *range;
299
300 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
301 for (i = 0; i < size; i++) {
302 km->sendcount[i] = range[i + 1] - range[i];
303 km->displ[i] = range[i];
304 }
305 }
306 PetscCall(MatDestroy(&A));
307 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
308 PetscCall(KSPSetFromOptions(ksp));
309 PCMPIServerInSolve = PETSC_FALSE;
310 PetscFunctionReturn(PETSC_SUCCESS);
311 }
312
PCMPIUpdateMatValues(PC pc)313 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
314 {
315 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
316 KSP ksp;
317 Mat sA, A;
318 MPI_Comm comm = PC_MPI_COMM_WORLD;
319 const PetscInt *ia, *IA;
320 const PetscScalar *a;
321 PetscCount nz;
322 const PetscScalar *sa = NULL;
323 PetscMPIInt size;
324 PetscInt rstart, matproperties[4] = {0, 0, 0, 0};
325
326 PetscFunctionBegin;
327 if (pc) {
328 PetscCall(PCGetOperators(pc, &sA, &sA));
329 PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
330 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
331 }
332 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
333 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
334 PCMPIServerInSolve = PETSC_TRUE;
335 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
336 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
337 PetscCallMPI(MPI_Comm_size(comm, &size));
338 PCMPIMatCounts[size - 1]++;
339 PetscCall(KSPGetOperators(ksp, NULL, &A));
340 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
341 if (!PCMPIServerUseShmget) {
342 PetscInt petsc_nz;
343
344 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
345 PetscCall(PetscIntCast(nz, &petsc_nz));
346 PetscCall(PetscMalloc1(nz, &a));
347 PetscCallMPI(MPIU_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, petsc_nz, MPIU_SCALAR, 0, comm));
348 } else {
349 PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
350 PCMPIServerAddresses *addresses;
351 PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", &addresses));
352 ia = rstart + (PetscInt *)addresses->addr[0];
353 a = ia[0] + (PetscScalar *)addresses->addr[2];
354 }
355 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
356 if (pc) {
357 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
358
359 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
360 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
361
362 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
363 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
364 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
365 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
366 PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
367 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
368 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
369 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
370 }
371 PetscCall(MatUpdateMPIAIJWithArray(A, a));
372 if (!PCMPIServerUseShmget) PetscCall(PetscFree(a));
373 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
374 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
375 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
376 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
377 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
378 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
379 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
380 PCMPIServerInSolve = PETSC_FALSE;
381 PetscFunctionReturn(PETSC_SUCCESS);
382 }
383
PCMPISolve(PC pc,Vec B,Vec X)384 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
385 {
386 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
387 KSP ksp;
388 MPI_Comm comm = PC_MPI_COMM_WORLD;
389 const PetscScalar *sb = NULL, *x;
390 PetscScalar *b, *sx = NULL;
391 PetscInt its, n;
392 PetscMPIInt size;
393 void *addr[2];
394
395 PetscFunctionBegin;
396 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
397 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
398 PCMPIServerInSolve = PETSC_TRUE;
399 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
400 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
401
402 /* scatterv rhs */
403 PetscCallMPI(MPI_Comm_size(comm, &size));
404 if (pc) {
405 PetscInt N;
406
407 PCMPISolveCounts[size - 1]++;
408 PetscCall(MatGetSize(pc->pmat, &N, NULL));
409 PCMPISizes[size - 1] += N;
410 }
411 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
412 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
413 if (!PCMPIServerUseShmget) {
414 PetscCall(VecGetArray(ksp->vec_rhs, &b));
415 if (pc) PetscCall(VecGetArrayRead(B, &sb));
416 PetscCallMPI(MPIU_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
417 if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
418 PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
419 // TODO: scatter initial guess if needed
420 } else {
421 PetscInt rstart;
422
423 if (pc) PetscCall(VecGetArrayRead(B, &sb));
424 if (pc) PetscCall(VecGetArray(X, &sx));
425 const void *inaddr[2] = {(const void **)sb, (const void **)sx};
426 if (pc) PetscCall(VecRestoreArray(X, &sx));
427 if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
428
429 PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr));
430 PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL));
431 PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0]));
432 PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1]));
433 }
434 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
435
436 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
437 PetscCall(PetscLogStagePush(PCMPIStage));
438 PetscCall(KSPSolve(ksp, NULL, NULL));
439 PetscCall(PetscLogStagePop());
440 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
441 PetscCall(KSPGetIterationNumber(ksp, &its));
442 PCMPIIterations[size - 1] += its;
443 // TODO: send iterations up to outer KSP
444
445 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr));
446
447 /* gather solution */
448 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
449 if (!PCMPIServerUseShmget) {
450 PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
451 if (pc) PetscCall(VecGetArray(X, &sx));
452 PetscCallMPI(MPIU_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
453 if (pc) PetscCall(VecRestoreArray(X, &sx));
454 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
455 } else {
456 PetscCallMPI(MPI_Barrier(comm));
457 PetscCall(VecResetArray(ksp->vec_rhs));
458 PetscCall(VecResetArray(ksp->vec_sol));
459 }
460 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
461 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
462 PCMPIServerInSolve = PETSC_FALSE;
463 PetscFunctionReturn(PETSC_SUCCESS);
464 }
465
PCMPIDestroy(PC pc)466 static PetscErrorCode PCMPIDestroy(PC pc)
467 {
468 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
469 KSP ksp;
470 MPI_Comm comm = PC_MPI_COMM_WORLD;
471
472 PetscFunctionBegin;
473 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
474 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
475 PetscCall(PetscLogStagePush(PCMPIStage));
476 PCMPIServerInSolve = PETSC_TRUE;
477 PetscCall(KSPDestroy(&ksp));
478 PetscCall(PetscLogStagePop());
479 PCMPIServerInSolve = PETSC_FALSE;
480 PetscFunctionReturn(PETSC_SUCCESS);
481 }
482
PCMPIServerBroadcastRequest(PCMPICommand request)483 static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request)
484 {
485 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
486 PetscMPIInt dummy1 = 1, dummy2;
487 #endif
488
489 PetscFunctionBegin;
490 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
491 if (PCMPIServerUseShmget) {
492 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]);
493 }
494 #endif
495 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
496 /* next line ensures the sender has already taken the lock */
497 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
498 if (PCMPIServerUseShmget) {
499 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
500 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]);
501 }
502 #endif
503 PetscFunctionReturn(PETSC_SUCCESS);
504 }
505
506 /*@C
507 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
508 parallel `KSP` solves and management of parallel `KSP` objects.
509
510 Logically Collective on all MPI processes except rank 0
511
512 Options Database Keys:
513 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
514 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
515 - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported)
516
517 Level: developer
518
519 Note:
520 This is normally started automatically in `PetscInitialize()` when the option is provided
521
522 See `PCMPI` for information on using the solver with a `KSP` object
523
524 See `PetscShmgetAllocateArray()` for instructions on how to ensure the shared memory is available on your machine.
525
526 Developer Notes:
527 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
528 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
529 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
530
531 Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
532
533 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
534
535 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
536
537 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
538 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
539
540 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
541 all MPI processes but the user callback only runs on one MPI process per node.
542
543 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
544 the `MPI_Comm` argument from PETSc calls.
545
546 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
547 @*/
PCMPIServerBegin(void)548 PetscErrorCode PCMPIServerBegin(void)
549 {
550 PetscMPIInt rank;
551
552 PetscFunctionBegin;
553 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
554 if (PetscDefined(USE_SINGLE_LIBRARY)) {
555 PetscCall(VecInitializePackage());
556 PetscCall(MatInitializePackage());
557 PetscCall(DMInitializePackage());
558 PetscCall(PCInitializePackage());
559 PetscCall(KSPInitializePackage());
560 PetscCall(SNESInitializePackage());
561 PetscCall(TSInitializePackage());
562 PetscCall(TaoInitializePackage());
563 }
564 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
565 PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist));
566 PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI));
567
568 if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE;
569 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL));
570
571 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
572 if (PCMPIServerUseShmget) {
573 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
574 PetscMPIInt size;
575
576 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
577 if (size > 1) {
578 pthread_mutex_t *locks;
579
580 if (rank == 0) {
581 PCMPIServerActive = PETSC_TRUE;
582 PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks));
583 }
584 PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks));
585 if (rank == 0) {
586 pthread_mutexattr_t attr;
587
588 pthread_mutexattr_init(&attr);
589 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
590
591 for (int i = 1; i < size; i++) {
592 pthread_mutex_init(&PCMPIServerLocks[i], &attr);
593 pthread_mutex_lock(&PCMPIServerLocks[i]);
594 }
595 }
596 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
597 }
598 #endif
599 }
600 if (rank == 0) {
601 PETSC_COMM_WORLD = PETSC_COMM_SELF;
602 PCMPIServerActive = PETSC_TRUE;
603 PetscFunctionReturn(PETSC_SUCCESS);
604 }
605
606 while (PETSC_TRUE) {
607 PCMPICommand request = PCMPI_CREATE;
608 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
609 PetscMPIInt dummy1 = 1, dummy2;
610 #endif
611
612 // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters?
613 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
614 if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]);
615 #endif
616 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
617 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
618 if (PCMPIServerUseShmget) {
619 /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */
620 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
621 pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]);
622 }
623 #endif
624 switch (request) {
625 case PCMPI_CREATE:
626 PetscCall(PCMPICreate(NULL));
627 break;
628 case PCMPI_SET_MAT:
629 PetscCall(PCMPISetMat(NULL));
630 break;
631 case PCMPI_UPDATE_MAT_VALUES:
632 PetscCall(PCMPIUpdateMatValues(NULL));
633 break;
634 case PCMPI_VIEW:
635 // PetscCall(PCMPIView(NULL));
636 break;
637 case PCMPI_SOLVE:
638 PetscCall(PCMPISolve(NULL, NULL, NULL));
639 break;
640 case PCMPI_DESTROY:
641 PetscCall(PCMPIDestroy(NULL));
642 break;
643 case PCMPI_EXIT:
644 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
645 PetscCall(PetscFinalize());
646 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
647 break;
648 default:
649 break;
650 }
651 }
652 PetscFunctionReturn(PETSC_SUCCESS);
653 }
654
655 /*@C
656 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
657 parallel KSP solves and management of parallel `KSP` objects.
658
659 Logically Collective on all MPI ranks except 0
660
661 Level: developer
662
663 Note:
664 This is normally called automatically in `PetscFinalize()`
665
666 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
667 @*/
PCMPIServerEnd(void)668 PetscErrorCode PCMPIServerEnd(void)
669 {
670 PetscFunctionBegin;
671 if (PetscGlobalRank == 0) {
672 PetscViewer viewer = NULL;
673 PetscViewerFormat format;
674
675 PetscCall(PetscShmgetAddressesFinalize());
676 PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT));
677 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
678 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
679 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
680 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
681 PetscOptionsEnd();
682 if (viewer) {
683 PetscBool isascii;
684
685 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
686 if (isascii) {
687 PetscMPIInt size;
688 PetscMPIInt i;
689
690 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
691 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
692 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n"));
693 if (PCMPIKSPCountsSeq) {
694 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
695 }
696 for (i = 0; i < size; i++) {
697 if (PCMPIKSPCounts[i]) {
698 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
699 }
700 }
701 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not "));
702 }
703 PetscCall(PetscViewerDestroy(&viewer));
704 }
705 }
706 PetscCall(PCMPICommsDestroy());
707 PCMPIServerActive = PETSC_FALSE;
708 PetscFunctionReturn(PETSC_SUCCESS);
709 }
710
711 /*
712 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
713 because, for example, the problem is small. This version is more efficient because it does not require copying any data
714 */
PCSetUp_Seq(PC pc)715 static PetscErrorCode PCSetUp_Seq(PC pc)
716 {
717 PC_MPI *km = (PC_MPI *)pc->data;
718 Mat sA;
719 const char *prefix;
720 char *found = NULL, *cprefix;
721
722 PetscFunctionBegin;
723 PCMPIServerInSolve = PETSC_TRUE;
724 PetscCall(PCGetOperators(pc, NULL, &sA));
725 PetscCall(PCGetOptionsPrefix(pc, &prefix));
726 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
727 PetscCall(KSPSetNestLevel(km->ksps[0], 1));
728 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
729
730 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
731 PetscCall(PCGetOptionsPrefix(pc, &prefix));
732 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
733 PetscCall(PetscStrallocpy(prefix, &cprefix));
734 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
735 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
736 *found = 0;
737 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
738 PetscCall(PetscFree(cprefix));
739
740 PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
741 PetscCall(KSPSetFromOptions(km->ksps[0]));
742 PetscCall(KSPSetUp(km->ksps[0]));
743 PetscCall(PetscInfo(pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
744 PCMPIKSPCountsSeq++;
745 PCMPIServerInSolve = PETSC_FALSE;
746 PetscFunctionReturn(PETSC_SUCCESS);
747 }
748
PCApply_Seq(PC pc,Vec b,Vec x)749 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
750 {
751 PC_MPI *km = (PC_MPI *)pc->data;
752 PetscInt its, n;
753 Mat A;
754
755 PetscFunctionBegin;
756 PCMPIServerInSolve = PETSC_TRUE;
757 PetscCall(KSPSolve(km->ksps[0], b, x));
758 PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
759 PCMPISolveCountsSeq++;
760 PCMPIIterationsSeq += its;
761 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
762 PetscCall(MatGetSize(A, &n, NULL));
763 PCMPISizesSeq += n;
764 PCMPIServerInSolve = PETSC_FALSE;
765 /*
766 do not keep reference to previous rhs and solution since destroying them in the next KSPSolve()
767 my use PetscFree() instead of PCMPIArrayDeallocate()
768 */
769 PetscCall(VecDestroy(&km->ksps[0]->vec_rhs));
770 PetscCall(VecDestroy(&km->ksps[0]->vec_sol));
771 PetscFunctionReturn(PETSC_SUCCESS);
772 }
773
PCView_Seq(PC pc,PetscViewer viewer)774 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
775 {
776 PC_MPI *km = (PC_MPI *)pc->data;
777
778 PetscFunctionBegin;
779 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
780 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
781 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
782 PetscFunctionReturn(PETSC_SUCCESS);
783 }
784
PCDestroy_Seq(PC pc)785 static PetscErrorCode PCDestroy_Seq(PC pc)
786 {
787 PC_MPI *km = (PC_MPI *)pc->data;
788 Mat A, B;
789 Vec x, b;
790
791 PetscFunctionBegin;
792 PCMPIServerInSolve = PETSC_TRUE;
793 /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */
794 PetscCall(KSPGetOperators(km->ksps[0], &A, &B));
795 PetscCall(PetscObjectReference((PetscObject)A));
796 PetscCall(PetscObjectReference((PetscObject)B));
797 PetscCall(KSPGetSolution(km->ksps[0], &x));
798 PetscCall(PetscObjectReference((PetscObject)x));
799 PetscCall(KSPGetRhs(km->ksps[0], &b));
800 PetscCall(PetscObjectReference((PetscObject)b));
801 PetscCall(KSPDestroy(&km->ksps[0]));
802 PetscCall(PetscFree(pc->data));
803 PCMPIServerInSolve = PETSC_FALSE;
804 PetscCall(MatDestroy(&A));
805 PetscCall(MatDestroy(&B));
806 PetscCall(VecDestroy(&x));
807 PetscCall(VecDestroy(&b));
808 PetscFunctionReturn(PETSC_SUCCESS);
809 }
810
811 /*
812 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
813 right-hand side to the parallel PC
814 */
PCSetUp_MPI(PC pc)815 static PetscErrorCode PCSetUp_MPI(PC pc)
816 {
817 PC_MPI *km = (PC_MPI *)pc->data;
818 PetscMPIInt rank, size;
819 PetscBool newmatrix = PETSC_FALSE;
820
821 PetscFunctionBegin;
822 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
823 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
824 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
825
826 if (!pc->setupcalled) {
827 if (!km->alwaysuseserver) {
828 PetscInt n;
829 Mat sA;
830 /* short circuit for small systems */
831 PetscCall(PCGetOperators(pc, &sA, &sA));
832 PetscCall(MatGetSize(sA, &n, NULL));
833 if (n < 2 * km->mincntperrank - 1 || size == 1) {
834 pc->ops->setup = NULL;
835 pc->ops->apply = PCApply_Seq;
836 pc->ops->destroy = PCDestroy_Seq;
837 pc->ops->view = PCView_Seq;
838 PetscCall(PCSetUp_Seq(pc));
839 PetscFunctionReturn(PETSC_SUCCESS);
840 }
841 }
842
843 PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE));
844 PetscCall(PCMPICreate(pc));
845 newmatrix = PETSC_TRUE;
846 }
847 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
848
849 if (newmatrix) {
850 PetscCall(PetscInfo(pc, "New matrix or matrix has changed nonzero structure\n"));
851 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT));
852 PetscCall(PCMPISetMat(pc));
853 } else {
854 PetscCall(PetscInfo(pc, "Matrix has only changed nonzero values\n"));
855 PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES));
856 PetscCall(PCMPIUpdateMatValues(pc));
857 }
858 PetscFunctionReturn(PETSC_SUCCESS);
859 }
860
PCApply_MPI(PC pc,Vec b,Vec x)861 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
862 {
863 PetscFunctionBegin;
864 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE));
865 PetscCall(PCMPISolve(pc, b, x));
866 PetscFunctionReturn(PETSC_SUCCESS);
867 }
868
PCDestroy_MPI(PC pc)869 static PetscErrorCode PCDestroy_MPI(PC pc)
870 {
871 PetscFunctionBegin;
872 PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY));
873 PetscCall(PCMPIDestroy(pc));
874 PetscCall(PetscFree(pc->data));
875 PetscFunctionReturn(PETSC_SUCCESS);
876 }
877
878 /*
879 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database
880 */
PCView_MPI(PC pc,PetscViewer viewer)881 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
882 {
883 PC_MPI *km = (PC_MPI *)pc->data;
884 MPI_Comm comm;
885 PetscMPIInt size;
886
887 PetscFunctionBegin;
888 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
889 PetscCallMPI(MPI_Comm_size(comm, &size));
890 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
891 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
892 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n"));
893 PetscFunctionReturn(PETSC_SUCCESS);
894 }
895
PCSetFromOptions_MPI(PC pc,PetscOptionItems PetscOptionsObject)896 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems PetscOptionsObject)
897 {
898 PC_MPI *km = (PC_MPI *)pc->data;
899
900 PetscFunctionBegin;
901 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
902 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
903 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
904 PetscOptionsHeadEnd();
905 PetscFunctionReturn(PETSC_SUCCESS);
906 }
907
908 /*MC
909 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
910
911 Options Database Keys for the Server:
912 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
913 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
914 - -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true
915
916 Options Database Keys for a specific `KSP` object
917 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
918 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
919
920 Level: developer
921
922 Notes:
923 This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or
924 `MatCreateSeqAIJWithArrays()`
925
926 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
927
928 It can be particularly useful for user OpenMP code or potentially user GPU code.
929
930 When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
931 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
932
933 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
934 because they are not the actual solver objects.
935
936 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
937 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
938
939 Developer Note:
940 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
941 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
942
943 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
944 M*/
PCCreate_MPI(PC pc)945 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
946 {
947 PC_MPI *km;
948 char *found = NULL;
949
950 PetscFunctionBegin;
951 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
952 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
953
954 /* material from PCSetType() */
955 PetscTryTypeMethod(pc, destroy);
956 pc->ops->destroy = NULL;
957 pc->data = NULL;
958
959 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
960 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
961 pc->modifysubmatrices = NULL;
962 pc->modifysubmatricesP = NULL;
963 pc->setupcalled = PETSC_FALSE;
964
965 PetscCall(PetscNew(&km));
966 pc->data = (void *)km;
967
968 km->mincntperrank = 10000;
969
970 pc->ops->setup = PCSetUp_MPI;
971 pc->ops->apply = PCApply_MPI;
972 pc->ops->destroy = PCDestroy_MPI;
973 pc->ops->view = PCView_MPI;
974 pc->ops->setfromoptions = PCSetFromOptions_MPI;
975 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
976 PetscFunctionReturn(PETSC_SUCCESS);
977 }
978
979 /*@
980 PCMPIGetKSP - Gets the `KSP` created by the `PCMPI`
981
982 Not Collective
983
984 Input Parameter:
985 . pc - the preconditioner context
986
987 Output Parameter:
988 . innerksp - the inner `KSP`
989
990 Level: advanced
991
992 .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE`
993 @*/
PCMPIGetKSP(PC pc,KSP * innerksp)994 PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp)
995 {
996 PC_MPI *red = (PC_MPI *)pc->data;
997
998 PetscFunctionBegin;
999 PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
1000 PetscAssertPointer(innerksp, 2);
1001 *innerksp = red->ksps[0];
1002 PetscFunctionReturn(PETSC_SUCCESS);
1003 }
1004