xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision d8e47b638cf8f604a99e9678e1df24f82d959cd7)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right-hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/
14 #include <petsc/private/kspimpl.h>
15 #include <petscts.h>
16 #include <petsctao.h>
17 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
18   #include <pthread.h>
19 #endif
20 
21 #define PC_MPI_MAX_RANKS  256
22 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
23 
24 typedef struct {
25   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */
26   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
27   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
28   PetscInt    mincntperrank;                                        /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */
29   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI process is used for the solve */
30 } PC_MPI;
31 
32 typedef enum {
33   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
34   PCMPI_CREATE,
35   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
36   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
37   PCMPI_SOLVE,
38   PCMPI_VIEW,
39   PCMPI_DESTROY /* destroy a PC that is no longer needed */
40 } PCMPICommand;
41 
42 static MPI_Comm      PCMPIComms[PC_MPI_MAX_RANKS];
43 static PetscBool     PCMPICommSet = PETSC_FALSE;
44 static PetscInt      PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
45 static PetscInt      PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
46 static PetscLogEvent EventServerDist, EventServerDistMPI;
47 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
48 static pthread_mutex_t *PCMPIServerLocks;
49 #else
50 static void *PCMPIServerLocks;
51 #endif
52 
53 static PetscErrorCode PCMPICommsCreate(void)
54 {
55   MPI_Comm    comm = PC_MPI_COMM_WORLD;
56   PetscMPIInt size, rank, i;
57 
58   PetscFunctionBegin;
59   PetscCallMPI(MPI_Comm_size(comm, &size));
60   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
61   PetscCallMPI(MPI_Comm_rank(comm, &rank));
62   /* comm for size 1 is useful only for debugging */
63   for (i = 0; i < size; i++) {
64     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
65     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
66     PCMPISolveCounts[i] = 0;
67     PCMPIKSPCounts[i]   = 0;
68     PCMPIIterations[i]  = 0;
69     PCMPISizes[i]       = 0;
70   }
71   PCMPICommSet = PETSC_TRUE;
72   PetscFunctionReturn(PETSC_SUCCESS);
73 }
74 
75 static PetscErrorCode PCMPICommsDestroy(void)
76 {
77   MPI_Comm    comm = PC_MPI_COMM_WORLD;
78   PetscMPIInt size, rank, i;
79 
80   PetscFunctionBegin;
81   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
82   PetscCallMPI(MPI_Comm_size(comm, &size));
83   PetscCallMPI(MPI_Comm_rank(comm, &rank));
84   for (i = 0; i < size; i++) {
85     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
86   }
87   PCMPICommSet = PETSC_FALSE;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 static PetscErrorCode PCMPICreate(PC pc)
92 {
93   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
94   MPI_Comm    comm = PC_MPI_COMM_WORLD;
95   KSP         ksp;
96   PetscInt    N[2], mincntperrank = 0;
97   PetscMPIInt size;
98   Mat         sA;
99   char       *cprefix = NULL;
100   PetscMPIInt len     = 0;
101 
102   PetscFunctionBegin;
103   PCMPIServerInSolve = PETSC_TRUE;
104   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
105   PetscCallMPI(MPI_Comm_size(comm, &size));
106   if (pc) {
107     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
108     PetscCall(PCGetOperators(pc, &sA, &sA));
109     PetscCall(MatGetSize(sA, &N[0], &N[1]));
110   }
111   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
112 
113   /* choose a suitable sized MPI_Comm for the problem to be solved on */
114   if (km) mincntperrank = km->mincntperrank;
115   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
116   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
117   if (comm == MPI_COMM_NULL) {
118     ksp                = NULL;
119     PCMPIServerInSolve = PETSC_FALSE;
120     PetscFunctionReturn(PETSC_SUCCESS);
121   }
122   PetscCall(PetscLogStagePush(PCMPIStage));
123   PetscCall(KSPCreate(comm, &ksp));
124   PetscCall(KSPSetNestLevel(ksp, 1));
125   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
126   PetscCall(PetscLogStagePop());
127   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
128   if (pc) {
129     size_t      slen;
130     const char *prefix = NULL;
131     char       *found  = NULL;
132 
133     PetscCallMPI(MPI_Comm_size(comm, &size));
134     PCMPIKSPCounts[size - 1]++;
135     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
136     PetscCall(PCGetOptionsPrefix(pc, &prefix));
137     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
138     PetscCall(PetscStrallocpy(prefix, &cprefix));
139     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
140     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
141     *found = 0;
142     PetscCall(PetscStrlen(cprefix, &slen));
143     len = (PetscMPIInt)slen;
144   }
145   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
146   if (len) {
147     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
148     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
149     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
150   }
151   PetscCall(PetscFree(cprefix));
152   PCMPIServerInSolve = PETSC_FALSE;
153   PetscFunctionReturn(PETSC_SUCCESS);
154 }
155 
156 static PetscErrorCode PCMPISetMat(PC pc)
157 {
158   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
159   Mat                A;
160   PetscInt           m, n, j, bs;
161   Mat                sA;
162   MPI_Comm           comm = PC_MPI_COMM_WORLD;
163   KSP                ksp;
164   PetscLayout        layout;
165   const PetscInt    *IA = NULL, *JA = NULL, *ia, *ja;
166   const PetscInt    *range;
167   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
168   const PetscScalar *a                = NULL, *sa;
169   PetscInt           matproperties[8] = {0}, rstart, rend;
170   char              *cprefix;
171 
172   PetscFunctionBegin;
173   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
174   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
175   PCMPIServerInSolve = PETSC_TRUE;
176   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
177   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
178   if (pc) {
179     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
180     const char *prefix;
181     size_t      clen;
182 
183     PetscCallMPI(MPI_Comm_size(comm, &size));
184     PCMPIMatCounts[size - 1]++;
185     PetscCall(PCGetOperators(pc, &sA, &sA));
186     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
187     PetscCall(MatGetBlockSize(sA, &bs));
188     matproperties[2] = bs;
189     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
190     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
191     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
192     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
193     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
194     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
195     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
196     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
197     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
198     PetscCall(MatGetOptionsPrefix(sA, &prefix));
199     PetscCall(PetscStrallocpy(prefix, &cprefix));
200     PetscCall(PetscStrlen(cprefix, &clen));
201     matproperties[7] = (PetscInt)clen;
202   }
203   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
204 
205   /* determine ownership ranges of matrix columns */
206   PetscCall(PetscLayoutCreate(comm, &layout));
207   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
208   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
209   PetscCall(PetscLayoutSetUp(layout));
210   PetscCall(PetscLayoutGetLocalSize(layout, &n));
211   PetscCall(PetscLayoutDestroy(&layout));
212 
213   /* determine ownership ranges of matrix rows */
214   PetscCall(PetscLayoutCreate(comm, &layout));
215   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
216   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
217   PetscCall(PetscLayoutSetUp(layout));
218   PetscCall(PetscLayoutGetLocalSize(layout, &m));
219   PetscCall(PetscLayoutGetRange(layout, &rstart, &rend));
220 
221   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
222   /* copy over the matrix nonzero structure and values */
223   if (pc) {
224     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
225     if (!PCMPIServerUseShmget) {
226       NZ      = km->NZ;
227       NZdispl = km->NZdispl;
228       PetscCall(PetscLayoutGetRanges(layout, &range));
229       for (i = 0; i < size; i++) {
230         sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
231         NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
232       }
233       displi[0]  = 0;
234       NZdispl[0] = 0;
235       for (j = 1; j < size; j++) {
236         displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
237         NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
238       }
239     }
240     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
241   }
242   PetscCall(PetscLayoutDestroy(&layout));
243 
244   PetscCall(MatCreate(comm, &A));
245   if (matproperties[7] > 0) {
246     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
247     PetscCallMPI(MPI_Bcast(cprefix, (PetscMPIInt)(matproperties[7] + 1), MPI_CHAR, 0, comm));
248     PetscCall(MatSetOptionsPrefix(A, cprefix));
249     PetscCall(PetscFree(cprefix));
250   }
251   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
252   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
253   PetscCall(MatSetType(A, MATMPIAIJ));
254 
255   if (!PCMPIServerUseShmget) {
256     PetscMPIInt in;
257     PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
258     PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
259     PetscCall(PetscMPIIntCast(n, &in));
260     PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, in + 1, MPIU_INT, 0, comm));
261     PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm));
262     PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm));
263   } else {
264     const void           *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa};
265     PCMPIServerAddresses *addresses;
266 
267     PetscCall(PetscNew(&addresses));
268     addresses->n = 3;
269     PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr));
270     ia = rstart + (PetscInt *)addresses->addr[0];
271     ja = ia[0] + (PetscInt *)addresses->addr[1];
272     a  = ia[0] + (PetscScalar *)addresses->addr[2];
273     PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, (PetscErrorCode(*)(void *))PCMPIServerAddressesDestroy));
274   }
275 
276   if (pc) {
277     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
278     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
279   }
280   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
281 
282   PetscCall(PetscLogStagePush(PCMPIStage));
283   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
284   PetscCall(MatSetBlockSize(A, matproperties[2]));
285 
286   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
287   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
288   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
289   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
290 
291   if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a));
292   PetscCall(KSPSetOperators(ksp, A, A));
293   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
294   PetscCall(PetscLogStagePop());
295   if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */
296     const PetscInt *range;
297 
298     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
299     for (i = 0; i < size; i++) {
300       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
301       km->displ[i]     = (PetscMPIInt)range[i];
302     }
303   }
304   PetscCall(MatDestroy(&A));
305   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
306   PetscCall(KSPSetFromOptions(ksp));
307   PCMPIServerInSolve = PETSC_FALSE;
308   PetscFunctionReturn(PETSC_SUCCESS);
309 }
310 
311 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
312 {
313   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
314   KSP                ksp;
315   Mat                sA, A;
316   MPI_Comm           comm = PC_MPI_COMM_WORLD;
317   const PetscInt    *ia, *IA;
318   const PetscScalar *a;
319   PetscCount         nz;
320   const PetscScalar *sa = NULL;
321   PetscMPIInt        size;
322   PetscInt           rstart, matproperties[4] = {0, 0, 0, 0};
323 
324   PetscFunctionBegin;
325   if (pc) {
326     PetscCall(PCGetOperators(pc, &sA, &sA));
327     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
328     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
329   }
330   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
331   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
332   PCMPIServerInSolve = PETSC_TRUE;
333   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
334   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
335   PetscCallMPI(MPI_Comm_size(comm, &size));
336   PCMPIMatCounts[size - 1]++;
337   PetscCall(KSPGetOperators(ksp, NULL, &A));
338   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
339   if (!PCMPIServerUseShmget) {
340     PetscMPIInt mpi_nz;
341 
342     PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
343     PetscCall(PetscMPIIntCast(nz, &mpi_nz));
344     PetscCall(PetscMalloc1(nz, &a));
345     PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, mpi_nz, MPIU_SCALAR, 0, comm));
346   } else {
347     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
348     PCMPIServerAddresses *addresses;
349     PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses));
350     ia = rstart + (PetscInt *)addresses->addr[0];
351     a  = ia[0] + (PetscScalar *)addresses->addr[2];
352   }
353   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
354   if (pc) {
355     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
356 
357     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
358     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
359 
360     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
361     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
362     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
363     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
364     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
365     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
366     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
367     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
368   }
369   PetscCall(MatUpdateMPIAIJWithArray(A, a));
370   if (!PCMPIServerUseShmget) PetscCall(PetscFree(a));
371   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
372   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
373   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
374   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
375   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
376   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
377   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
378   PCMPIServerInSolve = PETSC_FALSE;
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
383 {
384   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
385   KSP                ksp;
386   MPI_Comm           comm = PC_MPI_COMM_WORLD;
387   const PetscScalar *sb   = NULL, *x;
388   PetscScalar       *b, *sx = NULL;
389   PetscInt           its, n;
390   PetscMPIInt        size;
391   void              *addr[2];
392 
393   PetscFunctionBegin;
394   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
395   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
396   PCMPIServerInSolve = PETSC_TRUE;
397   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
398   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
399 
400   /* scatterv rhs */
401   PetscCallMPI(MPI_Comm_size(comm, &size));
402   if (pc) {
403     PetscInt N;
404 
405     PCMPISolveCounts[size - 1]++;
406     PetscCall(MatGetSize(pc->pmat, &N, NULL));
407     PCMPISizes[size - 1] += N;
408   }
409   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
410   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
411   if (!PCMPIServerUseShmget) {
412     PetscMPIInt in;
413 
414     PetscCall(VecGetArray(ksp->vec_rhs, &b));
415     if (pc) PetscCall(VecGetArrayRead(B, &sb));
416     PetscCall(PetscMPIIntCast(n, &in));
417     PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, in, MPIU_SCALAR, 0, comm));
418     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
419     PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
420     // TODO: scatter initial guess if needed
421   } else {
422     PetscInt rstart;
423 
424     if (pc) PetscCall(VecGetArrayRead(B, &sb));
425     if (pc) PetscCall(VecGetArray(X, &sx));
426     const void *inaddr[2] = {(const void **)sb, (const void **)sx};
427     if (pc) PetscCall(VecRestoreArray(X, &sx));
428     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
429 
430     PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr));
431     PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL));
432     PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0]));
433     PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1]));
434   }
435   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
436 
437   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
438   PetscCall(PetscLogStagePush(PCMPIStage));
439   PetscCall(KSPSolve(ksp, NULL, NULL));
440   PetscCall(PetscLogStagePop());
441   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
442   PetscCall(KSPGetIterationNumber(ksp, &its));
443   PCMPIIterations[size - 1] += its;
444   // TODO: send iterations up to outer KSP
445 
446   if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr));
447 
448   /* gather solution */
449   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
450   if (!PCMPIServerUseShmget) {
451     PetscMPIInt in;
452 
453     PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
454     if (pc) PetscCall(VecGetArray(X, &sx));
455     PetscCall(PetscMPIIntCast(n, &in));
456     PetscCallMPI(MPI_Gatherv(x, in, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
457     if (pc) PetscCall(VecRestoreArray(X, &sx));
458     PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
459   } else {
460     PetscCall(VecResetArray(ksp->vec_rhs));
461     PetscCall(VecResetArray(ksp->vec_sol));
462   }
463   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
464   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
465   PCMPIServerInSolve = PETSC_FALSE;
466   PetscFunctionReturn(PETSC_SUCCESS);
467 }
468 
469 static PetscErrorCode PCMPIDestroy(PC pc)
470 {
471   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
472   KSP      ksp;
473   MPI_Comm comm = PC_MPI_COMM_WORLD;
474 
475   PetscFunctionBegin;
476   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
477   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
478   PetscCall(PetscLogStagePush(PCMPIStage));
479   PCMPIServerInSolve = PETSC_TRUE;
480   PetscCall(KSPDestroy(&ksp));
481   PetscCall(PetscLogStagePop());
482   PCMPIServerInSolve = PETSC_FALSE;
483   PetscFunctionReturn(PETSC_SUCCESS);
484 }
485 
486 static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request)
487 {
488 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
489   PetscMPIInt dummy1 = 1, dummy2;
490 #endif
491 
492   PetscFunctionBegin;
493 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
494   if (PCMPIServerUseShmget) {
495     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]);
496   }
497 #endif
498   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
499   /* next line ensures the sender has already taken the lock */
500 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
501   if (PCMPIServerUseShmget) {
502     PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
503     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]);
504   }
505 #endif
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 /*@C
510   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
511   parallel `KSP` solves and management of parallel `KSP` objects.
512 
513   Logically Collective on all MPI processes except rank 0
514 
515   Options Database Keys:
516 + -mpi_linear_solver_server                   - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
517 . -mpi_linear_solver_server_view              - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
518 - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported)
519 
520   Level: developer
521 
522   Note:
523   This is normally started automatically in `PetscInitialize()` when the option is provided
524 
525   See `PCMPI` for information on using the solver with a `KSP` object
526 
527   Developer Notes:
528   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
529   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
530   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
531 
532   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
533 
534   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
535 
536   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
537 
538   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
539   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
540 
541   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
542   all MPI processes but the user callback only runs on one MPI process per node.
543 
544   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
545   the `MPI_Comm` argument from PETSc calls.
546 
547 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
548 @*/
549 PetscErrorCode PCMPIServerBegin(void)
550 {
551   PetscMPIInt rank;
552 
553   PetscFunctionBegin;
554   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
555   if (PetscDefined(USE_SINGLE_LIBRARY)) {
556     PetscCall(VecInitializePackage());
557     PetscCall(MatInitializePackage());
558     PetscCall(DMInitializePackage());
559     PetscCall(PCInitializePackage());
560     PetscCall(KSPInitializePackage());
561     PetscCall(SNESInitializePackage());
562     PetscCall(TSInitializePackage());
563     PetscCall(TaoInitializePackage());
564   }
565   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
566   PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist));
567   PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI));
568 
569   if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE;
570   PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL));
571 
572   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
573   if (PCMPIServerUseShmget) {
574 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
575     PetscMPIInt size;
576 
577     PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
578     if (size > 1) {
579       pthread_mutex_t *locks;
580 
581       if (rank == 0) {
582         PCMPIServerActive = PETSC_TRUE;
583         PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks));
584       }
585       PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks));
586       if (rank == 0) {
587         pthread_mutexattr_t attr;
588 
589         pthread_mutexattr_init(&attr);
590         pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
591 
592         for (int i = 1; i < size; i++) {
593           pthread_mutex_init(&PCMPIServerLocks[i], &attr);
594           pthread_mutex_lock(&PCMPIServerLocks[i]);
595         }
596       }
597       PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
598     }
599 #endif
600   }
601   if (rank == 0) {
602     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
603     PCMPIServerActive = PETSC_TRUE;
604     PetscFunctionReturn(PETSC_SUCCESS);
605   }
606 
607   while (PETSC_TRUE) {
608     PCMPICommand request = PCMPI_CREATE;
609 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
610     PetscMPIInt dummy1 = 1, dummy2;
611 #endif
612 
613     // TODO: can we broadcast the number of active ranks here so only the correct subset of proccesses waits on the later scatters?
614 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
615     if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]);
616 #endif
617     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
618 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
619     if (PCMPIServerUseShmget) {
620       /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */
621       PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
622       pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]);
623     }
624 #endif
625     switch (request) {
626     case PCMPI_CREATE:
627       PetscCall(PCMPICreate(NULL));
628       break;
629     case PCMPI_SET_MAT:
630       PetscCall(PCMPISetMat(NULL));
631       break;
632     case PCMPI_UPDATE_MAT_VALUES:
633       PetscCall(PCMPIUpdateMatValues(NULL));
634       break;
635     case PCMPI_VIEW:
636       // PetscCall(PCMPIView(NULL));
637       break;
638     case PCMPI_SOLVE:
639       PetscCall(PCMPISolve(NULL, NULL, NULL));
640       break;
641     case PCMPI_DESTROY:
642       PetscCall(PCMPIDestroy(NULL));
643       break;
644     case PCMPI_EXIT:
645       if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
646       PetscCall(PetscFinalize());
647       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
648       break;
649     default:
650       break;
651     }
652   }
653   PetscFunctionReturn(PETSC_SUCCESS);
654 }
655 
656 /*@C
657   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
658   parallel KSP solves and management of parallel `KSP` objects.
659 
660   Logically Collective on all MPI ranks except 0
661 
662   Level: developer
663 
664   Note:
665   This is normally called automatically in `PetscFinalize()`
666 
667 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
668 @*/
669 PetscErrorCode PCMPIServerEnd(void)
670 {
671   PetscFunctionBegin;
672   if (PetscGlobalRank == 0) {
673     PetscViewer       viewer = NULL;
674     PetscViewerFormat format;
675 
676     PetscCall(PetscShmgetAddressesFinalize());
677     PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT));
678     if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
679     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
680     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
681     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
682     PetscOptionsEnd();
683     if (viewer) {
684       PetscBool isascii;
685 
686       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
687       if (isascii) {
688         PetscMPIInt size;
689         PetscMPIInt i;
690 
691         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
692         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
693         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
694         if (PCMPIKSPCountsSeq) {
695           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
696         }
697         for (i = 0; i < size; i++) {
698           if (PCMPIKSPCounts[i]) {
699             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
700           }
701         }
702         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not "));
703       }
704       PetscCall(PetscViewerDestroy(&viewer));
705     }
706   }
707   PetscCall(PCMPICommsDestroy());
708   PCMPIServerActive = PETSC_FALSE;
709   PetscFunctionReturn(PETSC_SUCCESS);
710 }
711 
712 /*
713     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
714     because, for example, the problem is small. This version is more efficient because it does not require copying any data
715 */
716 static PetscErrorCode PCSetUp_Seq(PC pc)
717 {
718   PC_MPI     *km = (PC_MPI *)pc->data;
719   Mat         sA;
720   const char *prefix;
721   char       *found = NULL, *cprefix;
722 
723   PetscFunctionBegin;
724   PCMPIServerInSolve = PETSC_TRUE;
725   PetscCall(PCGetOperators(pc, NULL, &sA));
726   PetscCall(PCGetOptionsPrefix(pc, &prefix));
727   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
728   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
729   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
730 
731   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
732   PetscCall(PCGetOptionsPrefix(pc, &prefix));
733   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
734   PetscCall(PetscStrallocpy(prefix, &cprefix));
735   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
736   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
737   *found = 0;
738   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
739   PetscCall(PetscFree(cprefix));
740 
741   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
742   PetscCall(KSPSetFromOptions(km->ksps[0]));
743   PetscCall(KSPSetUp(km->ksps[0]));
744   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
745   PCMPIKSPCountsSeq++;
746   PCMPIServerInSolve = PETSC_FALSE;
747   PetscFunctionReturn(PETSC_SUCCESS);
748 }
749 
750 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
751 {
752   PC_MPI  *km = (PC_MPI *)pc->data;
753   PetscInt its, n;
754   Mat      A;
755 
756   PetscFunctionBegin;
757   PCMPIServerInSolve = PETSC_TRUE;
758   PetscCall(KSPSolve(km->ksps[0], b, x));
759   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
760   PCMPISolveCountsSeq++;
761   PCMPIIterationsSeq += its;
762   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
763   PetscCall(MatGetSize(A, &n, NULL));
764   PCMPISizesSeq += n;
765   PCMPIServerInSolve = PETSC_FALSE;
766   /*
767     do not keep reference to previous rhs and solution since destroying them in the next KSPSolve()
768     my use PetscFree() instead of PCMPIArrayDeallocate()
769   */
770   PetscCall(VecDestroy(&km->ksps[0]->vec_rhs));
771   PetscCall(VecDestroy(&km->ksps[0]->vec_sol));
772   PetscFunctionReturn(PETSC_SUCCESS);
773 }
774 
775 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
776 {
777   PC_MPI *km = (PC_MPI *)pc->data;
778 
779   PetscFunctionBegin;
780   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
781   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
782   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
783   PetscFunctionReturn(PETSC_SUCCESS);
784 }
785 
786 static PetscErrorCode PCDestroy_Seq(PC pc)
787 {
788   PC_MPI *km = (PC_MPI *)pc->data;
789   Mat     A, B;
790   Vec     x, b;
791 
792   PetscFunctionBegin;
793   PCMPIServerInSolve = PETSC_TRUE;
794   /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */
795   PetscCall(KSPGetOperators(km->ksps[0], &A, &B));
796   PetscCall(PetscObjectReference((PetscObject)A));
797   PetscCall(PetscObjectReference((PetscObject)B));
798   PetscCall(KSPGetSolution(km->ksps[0], &x));
799   PetscCall(PetscObjectReference((PetscObject)x));
800   PetscCall(KSPGetRhs(km->ksps[0], &b));
801   PetscCall(PetscObjectReference((PetscObject)b));
802   PetscCall(KSPDestroy(&km->ksps[0]));
803   PetscCall(PetscFree(pc->data));
804   PCMPIServerInSolve = PETSC_FALSE;
805   PetscCall(MatDestroy(&A));
806   PetscCall(MatDestroy(&B));
807   PetscCall(VecDestroy(&x));
808   PetscCall(VecDestroy(&b));
809   PetscFunctionReturn(PETSC_SUCCESS);
810 }
811 
812 /*
813      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
814      right-hand side to the parallel PC
815 */
816 static PetscErrorCode PCSetUp_MPI(PC pc)
817 {
818   PC_MPI     *km = (PC_MPI *)pc->data;
819   PetscMPIInt rank, size;
820   PetscBool   newmatrix = PETSC_FALSE;
821 
822   PetscFunctionBegin;
823   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
824   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
825   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
826 
827   if (!pc->setupcalled) {
828     if (!km->alwaysuseserver) {
829       PetscInt n;
830       Mat      sA;
831       /* short circuit for small systems */
832       PetscCall(PCGetOperators(pc, &sA, &sA));
833       PetscCall(MatGetSize(sA, &n, NULL));
834       if (n < 2 * km->mincntperrank - 1 || size == 1) {
835         pc->ops->setup   = NULL;
836         pc->ops->apply   = PCApply_Seq;
837         pc->ops->destroy = PCDestroy_Seq;
838         pc->ops->view    = PCView_Seq;
839         PetscCall(PCSetUp_Seq(pc));
840         PetscFunctionReturn(PETSC_SUCCESS);
841       }
842     }
843 
844     PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE));
845     PetscCall(PCMPICreate(pc));
846     newmatrix = PETSC_TRUE;
847   }
848   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
849 
850   if (newmatrix) {
851     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
852     PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT));
853     PetscCall(PCMPISetMat(pc));
854   } else {
855     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
856     PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES));
857     PetscCall(PCMPIUpdateMatValues(pc));
858   }
859   PetscFunctionReturn(PETSC_SUCCESS);
860 }
861 
862 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
863 {
864   PetscFunctionBegin;
865   PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE));
866   PetscCall(PCMPISolve(pc, b, x));
867   PetscFunctionReturn(PETSC_SUCCESS);
868 }
869 
870 static PetscErrorCode PCDestroy_MPI(PC pc)
871 {
872   PetscFunctionBegin;
873   PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY));
874   PetscCall(PCMPIDestroy(pc));
875   PetscCall(PetscFree(pc->data));
876   PetscFunctionReturn(PETSC_SUCCESS);
877 }
878 
879 /*
880      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database
881 */
882 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
883 {
884   PC_MPI     *km = (PC_MPI *)pc->data;
885   MPI_Comm    comm;
886   PetscMPIInt size;
887 
888   PetscFunctionBegin;
889   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
890   PetscCallMPI(MPI_Comm_size(comm, &size));
891   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
892   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %d\n", (int)km->mincntperrank));
893   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n"));
894   PetscFunctionReturn(PETSC_SUCCESS);
895 }
896 
897 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
898 {
899   PC_MPI *km = (PC_MPI *)pc->data;
900 
901   PetscFunctionBegin;
902   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
903   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
904   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
905   PetscOptionsHeadEnd();
906   PetscFunctionReturn(PETSC_SUCCESS);
907 }
908 
909 /*MC
910      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
911 
912    Options Database Keys for the Server:
913 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
914 .  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
915 -  -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true
916 
917    Options Database Keys for a specific `KSP` object
918 +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
919 -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
920 
921    Level: developer
922 
923    Notes:
924    This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or
925    `MatCreateSeqAIJWithArrays()`
926 
927    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
928 
929    It can be particularly useful for user OpenMP code or potentially user GPU code.
930 
931    When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
932    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
933 
934    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
935    because they are not the actual solver objects.
936 
937    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
938    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
939 
940    Developer Note:
941    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
942    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
943 
944 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
945 M*/
946 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
947 {
948   PC_MPI *km;
949   char   *found = NULL;
950 
951   PetscFunctionBegin;
952   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
953   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
954 
955   /* material from PCSetType() */
956   PetscTryTypeMethod(pc, destroy);
957   pc->ops->destroy = NULL;
958   pc->data         = NULL;
959 
960   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
961   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
962   pc->modifysubmatrices  = NULL;
963   pc->modifysubmatricesP = NULL;
964   pc->setupcalled        = 0;
965 
966   PetscCall(PetscNew(&km));
967   pc->data = (void *)km;
968 
969   km->mincntperrank = 10000;
970 
971   pc->ops->setup          = PCSetUp_MPI;
972   pc->ops->apply          = PCApply_MPI;
973   pc->ops->destroy        = PCDestroy_MPI;
974   pc->ops->view           = PCView_MPI;
975   pc->ops->setfromoptions = PCSetFromOptions_MPI;
976   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
977   PetscFunctionReturn(PETSC_SUCCESS);
978 }
979 
980 /*@
981   PCMPIGetKSP - Gets the `KSP` created by the `PCMPI`
982 
983   Not Collective
984 
985   Input Parameter:
986 . pc - the preconditioner context
987 
988   Output Parameter:
989 . innerksp - the inner `KSP`
990 
991   Level: advanced
992 
993 .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE`
994 @*/
995 PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp)
996 {
997   PC_MPI *red = (PC_MPI *)pc->data;
998 
999   PetscFunctionBegin;
1000   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
1001   PetscAssertPointer(innerksp, 2);
1002   *innerksp = red->ksps[0];
1003   PetscFunctionReturn(PETSC_SUCCESS);
1004 }
1005