xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision a29dfd43bb0c77e2653d3bfa2c953f902720a6d2)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right-hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/
14 #include <petsc/private/kspimpl.h>
15 #include <petscts.h>
16 #include <petsctao.h>
17 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
18   #include <pthread.h>
19 #endif
20 
21 #define PC_MPI_MAX_RANKS  256
22 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
23 
24 typedef struct {
25   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */
26   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
27   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
28   PetscInt    mincntperrank;                                        /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */
29   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI process is used for the solve */
30 } PC_MPI;
31 
32 typedef enum {
33   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
34   PCMPI_CREATE,
35   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
36   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
37   PCMPI_SOLVE,
38   PCMPI_VIEW,
39   PCMPI_DESTROY /* destroy a PC that is no longer needed */
40 } PCMPICommand;
41 
42 static MPI_Comm      PCMPIComms[PC_MPI_MAX_RANKS];
43 static PetscBool     PCMPICommSet = PETSC_FALSE;
44 static PetscInt      PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
45 static PetscInt      PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
46 static PetscLogEvent EventServerDist, EventServerDistMPI;
47 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
48 static pthread_mutex_t *PCMPIServerLocks;
49 #else
50 static void *PCMPIServerLocks;
51 #endif
52 
53 static PetscErrorCode PCMPICommsCreate(void)
54 {
55   MPI_Comm    comm = PC_MPI_COMM_WORLD;
56   PetscMPIInt size, rank, i;
57 
58   PetscFunctionBegin;
59   PetscCallMPI(MPI_Comm_size(comm, &size));
60   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
61   PetscCallMPI(MPI_Comm_rank(comm, &rank));
62   /* comm for size 1 is useful only for debugging */
63   for (i = 0; i < size; i++) {
64     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
65     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
66     PCMPISolveCounts[i] = 0;
67     PCMPIKSPCounts[i]   = 0;
68     PCMPIIterations[i]  = 0;
69     PCMPISizes[i]       = 0;
70   }
71   PCMPICommSet = PETSC_TRUE;
72   PetscFunctionReturn(PETSC_SUCCESS);
73 }
74 
75 static PetscErrorCode PCMPICommsDestroy(void)
76 {
77   MPI_Comm    comm = PC_MPI_COMM_WORLD;
78   PetscMPIInt size, rank, i;
79 
80   PetscFunctionBegin;
81   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
82   PetscCallMPI(MPI_Comm_size(comm, &size));
83   PetscCallMPI(MPI_Comm_rank(comm, &rank));
84   for (i = 0; i < size; i++) {
85     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
86   }
87   PCMPICommSet = PETSC_FALSE;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 static PetscErrorCode PCMPICreate(PC pc)
92 {
93   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
94   MPI_Comm    comm = PC_MPI_COMM_WORLD;
95   KSP         ksp;
96   PetscInt    N[2], mincntperrank = 0;
97   PetscMPIInt size;
98   Mat         sA;
99   char       *cprefix = NULL;
100   PetscMPIInt len     = 0;
101 
102   PetscFunctionBegin;
103   PCMPIServerInSolve = PETSC_TRUE;
104   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
105   PetscCallMPI(MPI_Comm_size(comm, &size));
106   if (pc) {
107     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
108     PetscCall(PCGetOperators(pc, &sA, &sA));
109     PetscCall(MatGetSize(sA, &N[0], &N[1]));
110   }
111   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
112 
113   /* choose a suitable sized MPI_Comm for the problem to be solved on */
114   if (km) mincntperrank = km->mincntperrank;
115   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
116   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
117   if (comm == MPI_COMM_NULL) {
118     ksp                = NULL;
119     PCMPIServerInSolve = PETSC_FALSE;
120     PetscFunctionReturn(PETSC_SUCCESS);
121   }
122   PetscCall(PetscLogStagePush(PCMPIStage));
123   PetscCall(KSPCreate(comm, &ksp));
124   PetscCall(KSPSetNestLevel(ksp, 1));
125   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
126   PetscCall(PetscLogStagePop());
127   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
128   if (pc) {
129     size_t      slen;
130     const char *prefix = NULL;
131     char       *found  = NULL;
132 
133     PetscCallMPI(MPI_Comm_size(comm, &size));
134     PCMPIKSPCounts[size - 1]++;
135     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
136     PetscCall(PCGetOptionsPrefix(pc, &prefix));
137     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
138     PetscCall(PetscStrallocpy(prefix, &cprefix));
139     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
140     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
141     *found = 0;
142     PetscCall(PetscStrlen(cprefix, &slen));
143     len = (PetscMPIInt)slen;
144   }
145   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
146   if (len) {
147     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
148     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
149     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
150   }
151   PetscCall(PetscFree(cprefix));
152   PCMPIServerInSolve = PETSC_FALSE;
153   PetscFunctionReturn(PETSC_SUCCESS);
154 }
155 
156 static PetscErrorCode PCMPISetMat(PC pc)
157 {
158   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
159   Mat                A;
160   PetscInt           m, n, j, bs;
161   Mat                sA;
162   MPI_Comm           comm = PC_MPI_COMM_WORLD;
163   KSP                ksp;
164   PetscLayout        layout;
165   const PetscInt    *IA = NULL, *JA = NULL, *ia, *ja;
166   const PetscInt    *range;
167   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
168   const PetscScalar *a                = NULL, *sa;
169   PetscInt           matproperties[8] = {0}, rstart, rend;
170   char              *cprefix;
171 
172   PetscFunctionBegin;
173   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
174   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
175   PCMPIServerInSolve = PETSC_TRUE;
176   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
177   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
178   if (pc) {
179     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
180     const char *prefix;
181     size_t      clen;
182 
183     PetscCallMPI(MPI_Comm_size(comm, &size));
184     PCMPIMatCounts[size - 1]++;
185     PetscCall(PCGetOperators(pc, &sA, &sA));
186     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
187     PetscCall(MatGetBlockSize(sA, &bs));
188     matproperties[2] = bs;
189     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
190     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
191     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
192     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
193     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
194     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
195     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
196     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
197     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
198     PetscCall(MatGetOptionsPrefix(sA, &prefix));
199     PetscCall(PetscStrallocpy(prefix, &cprefix));
200     PetscCall(PetscStrlen(cprefix, &clen));
201     matproperties[7] = (PetscInt)clen;
202   }
203   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
204 
205   /* determine ownership ranges of matrix columns */
206   PetscCall(PetscLayoutCreate(comm, &layout));
207   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
208   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
209   PetscCall(PetscLayoutSetUp(layout));
210   PetscCall(PetscLayoutGetLocalSize(layout, &n));
211   PetscCall(PetscLayoutDestroy(&layout));
212 
213   /* determine ownership ranges of matrix rows */
214   PetscCall(PetscLayoutCreate(comm, &layout));
215   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
216   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
217   PetscCall(PetscLayoutSetUp(layout));
218   PetscCall(PetscLayoutGetLocalSize(layout, &m));
219   PetscCall(PetscLayoutGetRange(layout, &rstart, &rend));
220 
221   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
222   /* copy over the matrix nonzero structure and values */
223   if (pc) {
224     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
225     if (!PCMPIServerUseShmget) {
226       NZ      = km->NZ;
227       NZdispl = km->NZdispl;
228       PetscCall(PetscLayoutGetRanges(layout, &range));
229       for (i = 0; i < size; i++) {
230         sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
231         NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
232       }
233       displi[0]  = 0;
234       NZdispl[0] = 0;
235       for (j = 1; j < size; j++) {
236         displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
237         NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
238       }
239     }
240     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
241   }
242   PetscCall(PetscLayoutDestroy(&layout));
243 
244   PetscCall(MatCreate(comm, &A));
245   if (matproperties[7] > 0) {
246     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
247     PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm));
248     PetscCall(MatSetOptionsPrefix(A, cprefix));
249     PetscCall(PetscFree(cprefix));
250   }
251   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
252   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
253   PetscCall(MatSetType(A, MATMPIAIJ));
254 
255   if (!PCMPIServerUseShmget) {
256     PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
257     PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
258     PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, n + 1, MPIU_INT, 0, comm));
259     PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm));
260     PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm));
261   } else {
262     const void           *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa};
263     PCMPIServerAddresses *addresses;
264 
265     PetscCall(PetscNew(&addresses));
266     addresses->n = 3;
267     PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr));
268     ia = rstart + (PetscInt *)addresses->addr[0];
269     ja = ia[0] + (PetscInt *)addresses->addr[1];
270     a  = ia[0] + (PetscScalar *)addresses->addr[2];
271     PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, (PetscErrorCode(*)(void *))PCMPIServerAddressesDestroy));
272   }
273 
274   if (pc) {
275     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
276     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
277   }
278   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
279 
280   PetscCall(PetscLogStagePush(PCMPIStage));
281   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
282   PetscCall(MatSetBlockSize(A, matproperties[2]));
283 
284   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
285   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
286   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
287   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
288 
289   if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a));
290   PetscCall(KSPSetOperators(ksp, A, A));
291   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
292   PetscCall(PetscLogStagePop());
293   if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */
294     const PetscInt *range;
295 
296     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
297     for (i = 0; i < size; i++) {
298       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
299       km->displ[i]     = (PetscMPIInt)range[i];
300     }
301   }
302   PetscCall(MatDestroy(&A));
303   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
304   PetscCall(KSPSetFromOptions(ksp));
305   PCMPIServerInSolve = PETSC_FALSE;
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
310 {
311   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
312   KSP                ksp;
313   Mat                sA, A;
314   MPI_Comm           comm = PC_MPI_COMM_WORLD;
315   const PetscInt    *ia, *IA;
316   const PetscScalar *a;
317   PetscCount         nz;
318   const PetscScalar *sa = NULL;
319   PetscMPIInt        size;
320   PetscInt           rstart, matproperties[4] = {0, 0, 0, 0};
321 
322   PetscFunctionBegin;
323   if (pc) {
324     PetscCall(PCGetOperators(pc, &sA, &sA));
325     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
326     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
327   }
328   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
329   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
330   PCMPIServerInSolve = PETSC_TRUE;
331   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
332   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
333   PetscCallMPI(MPI_Comm_size(comm, &size));
334   PCMPIMatCounts[size - 1]++;
335   PetscCall(KSPGetOperators(ksp, NULL, &A));
336   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
337   if (!PCMPIServerUseShmget) {
338     PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
339     PetscCall(PetscMalloc1(nz, &a));
340     PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm));
341   } else {
342     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
343     PCMPIServerAddresses *addresses;
344     PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses));
345     ia = rstart + (PetscInt *)addresses->addr[0];
346     a  = ia[0] + (PetscScalar *)addresses->addr[2];
347   }
348   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
349   if (pc) {
350     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
351 
352     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
353     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
354 
355     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
356     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
357     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
358     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
359     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
360     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
361     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
362     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
363   }
364   PetscCall(MatUpdateMPIAIJWithArray(A, a));
365   if (!PCMPIServerUseShmget) PetscCall(PetscFree(a));
366   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
367   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
368   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
369   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
370   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
371   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
372   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
373   PCMPIServerInSolve = PETSC_FALSE;
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
377 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
378 {
379   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
380   KSP                ksp;
381   MPI_Comm           comm = PC_MPI_COMM_WORLD;
382   const PetscScalar *sb   = NULL, *x;
383   PetscScalar       *b, *sx = NULL;
384   PetscInt           its, n;
385   PetscMPIInt        size;
386   void              *addr[2];
387 
388   PetscFunctionBegin;
389   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
390   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
391   PCMPIServerInSolve = PETSC_TRUE;
392   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
393   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
394 
395   /* scatterv rhs */
396   PetscCallMPI(MPI_Comm_size(comm, &size));
397   if (pc) {
398     PetscInt N;
399 
400     PCMPISolveCounts[size - 1]++;
401     PetscCall(MatGetSize(pc->pmat, &N, NULL));
402     PCMPISizes[size - 1] += N;
403   }
404   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
405   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
406   if (!PCMPIServerUseShmget) {
407     PetscCall(VecGetArray(ksp->vec_rhs, &b));
408     if (pc) PetscCall(VecGetArrayRead(B, &sb));
409     PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
410     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
411     PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
412     // TODO: scatter initial guess if needed
413   } else {
414     PetscInt rstart;
415 
416     if (pc) PetscCall(VecGetArrayRead(B, &sb));
417     if (pc) PetscCall(VecGetArray(X, &sx));
418     const void *inaddr[2] = {(const void **)sb, (const void **)sx};
419     if (pc) PetscCall(VecRestoreArray(X, &sx));
420     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
421 
422     PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr));
423     PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL));
424     PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0]));
425     PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1]));
426   }
427   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
428 
429   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
430   PetscCall(PetscLogStagePush(PCMPIStage));
431   PetscCall(KSPSolve(ksp, NULL, NULL));
432   PetscCall(PetscLogStagePop());
433   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
434   PetscCall(KSPGetIterationNumber(ksp, &its));
435   PCMPIIterations[size - 1] += its;
436   // TODO: send iterations up to outer KSP
437 
438   if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr));
439 
440   /* gather solution */
441   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
442   if (!PCMPIServerUseShmget) {
443     PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
444     if (pc) PetscCall(VecGetArray(X, &sx));
445     PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
446     if (pc) PetscCall(VecRestoreArray(X, &sx));
447     PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
448   } else {
449     PetscCall(VecResetArray(ksp->vec_rhs));
450     PetscCall(VecResetArray(ksp->vec_sol));
451   }
452   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
453   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
454   PCMPIServerInSolve = PETSC_FALSE;
455   PetscFunctionReturn(PETSC_SUCCESS);
456 }
457 
458 static PetscErrorCode PCMPIDestroy(PC pc)
459 {
460   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
461   KSP      ksp;
462   MPI_Comm comm = PC_MPI_COMM_WORLD;
463 
464   PetscFunctionBegin;
465   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
466   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
467   PetscCall(PetscLogStagePush(PCMPIStage));
468   PCMPIServerInSolve = PETSC_TRUE;
469   PetscCall(KSPDestroy(&ksp));
470   PetscCall(PetscLogStagePop());
471   PCMPIServerInSolve = PETSC_FALSE;
472   PetscFunctionReturn(PETSC_SUCCESS);
473 }
474 
475 static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request)
476 {
477 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
478   PetscMPIInt dummy1 = 1, dummy2;
479 #endif
480 
481   PetscFunctionBegin;
482 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
483   if (PCMPIServerUseShmget) {
484     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]);
485   }
486 #endif
487   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
488   /* next line ensures the sender has already taken the lock */
489 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
490   if (PCMPIServerUseShmget) {
491     PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
492     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]);
493   }
494 #endif
495   PetscFunctionReturn(PETSC_SUCCESS);
496 }
497 
498 /*@C
499   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
500   parallel `KSP` solves and management of parallel `KSP` objects.
501 
502   Logically Collective on all MPI processes except rank 0
503 
504   Options Database Keys:
505 + -mpi_linear_solver_server                   - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
506 . -mpi_linear_solver_server_view              - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
507 - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported)
508 
509   Level: developer
510 
511   Note:
512   This is normally started automatically in `PetscInitialize()` when the option is provided
513 
514   See `PCMPI` for information on using the solver with a `KSP` object
515 
516   Developer Notes:
517   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
518   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
519   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
520 
521   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
522 
523   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
524 
525   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
526 
527   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
528   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
529 
530   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
531   all MPI processes but the user callback only runs on one MPI process per node.
532 
533   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
534   the `MPI_Comm` argument from PETSc calls.
535 
536 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
537 @*/
538 PetscErrorCode PCMPIServerBegin(void)
539 {
540   PetscMPIInt rank;
541 
542   PetscFunctionBegin;
543   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
544   if (PetscDefined(USE_SINGLE_LIBRARY)) {
545     PetscCall(VecInitializePackage());
546     PetscCall(MatInitializePackage());
547     PetscCall(DMInitializePackage());
548     PetscCall(PCInitializePackage());
549     PetscCall(KSPInitializePackage());
550     PetscCall(SNESInitializePackage());
551     PetscCall(TSInitializePackage());
552     PetscCall(TaoInitializePackage());
553   }
554   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
555   PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist));
556   PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI));
557 
558   if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE;
559   PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL));
560 
561   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
562   if (PCMPIServerUseShmget) {
563 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
564     PetscMPIInt size;
565 
566     PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
567     if (size > 1) {
568       pthread_mutex_t *locks;
569 
570       if (rank == 0) {
571         PCMPIServerActive = PETSC_TRUE;
572         PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks));
573       }
574       PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks));
575       if (rank == 0) {
576         pthread_mutexattr_t attr;
577 
578         pthread_mutexattr_init(&attr);
579         pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
580 
581         for (int i = 1; i < size; i++) {
582           pthread_mutex_init(&PCMPIServerLocks[i], &attr);
583           pthread_mutex_lock(&PCMPIServerLocks[i]);
584         }
585       }
586       PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
587     }
588 #endif
589   }
590   if (rank == 0) {
591     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
592     PCMPIServerActive = PETSC_TRUE;
593     PetscFunctionReturn(PETSC_SUCCESS);
594   }
595 
596   while (PETSC_TRUE) {
597     PCMPICommand request = PCMPI_CREATE;
598 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
599     PetscMPIInt dummy1 = 1, dummy2;
600 #endif
601 
602     // TODO: can we broadcast the number of active ranks here so only the correct subset of proccesses waits on the later scatters?
603 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
604     if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]);
605 #endif
606     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
607 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
608     if (PCMPIServerUseShmget) {
609       /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */
610       PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
611       pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]);
612     }
613 #endif
614     switch (request) {
615     case PCMPI_CREATE:
616       PetscCall(PCMPICreate(NULL));
617       break;
618     case PCMPI_SET_MAT:
619       PetscCall(PCMPISetMat(NULL));
620       break;
621     case PCMPI_UPDATE_MAT_VALUES:
622       PetscCall(PCMPIUpdateMatValues(NULL));
623       break;
624     case PCMPI_VIEW:
625       // PetscCall(PCMPIView(NULL));
626       break;
627     case PCMPI_SOLVE:
628       PetscCall(PCMPISolve(NULL, NULL, NULL));
629       break;
630     case PCMPI_DESTROY:
631       PetscCall(PCMPIDestroy(NULL));
632       break;
633     case PCMPI_EXIT:
634       if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
635       PetscCall(PetscFinalize());
636       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
637       break;
638     default:
639       break;
640     }
641   }
642   PetscFunctionReturn(PETSC_SUCCESS);
643 }
644 
645 /*@C
646   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
647   parallel KSP solves and management of parallel `KSP` objects.
648 
649   Logically Collective on all MPI ranks except 0
650 
651   Level: developer
652 
653   Note:
654   This is normally called automatically in `PetscFinalize()`
655 
656 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
657 @*/
658 PetscErrorCode PCMPIServerEnd(void)
659 {
660   PetscFunctionBegin;
661   if (PetscGlobalRank == 0) {
662     PetscViewer       viewer = NULL;
663     PetscViewerFormat format;
664 
665     PetscCall(PetscShmgetAddressesFinalize());
666     PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT));
667     if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
668     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
669     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
670     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
671     PetscOptionsEnd();
672     if (viewer) {
673       PetscBool isascii;
674 
675       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
676       if (isascii) {
677         PetscMPIInt size;
678         PetscMPIInt i;
679 
680         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
681         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
682         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
683         if (PCMPIKSPCountsSeq) {
684           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
685         }
686         for (i = 0; i < size; i++) {
687           if (PCMPIKSPCounts[i]) {
688             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
689           }
690         }
691         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not "));
692       }
693       PetscCall(PetscViewerDestroy(&viewer));
694     }
695   }
696   PetscCall(PCMPICommsDestroy());
697   PCMPIServerActive = PETSC_FALSE;
698   PetscFunctionReturn(PETSC_SUCCESS);
699 }
700 
701 /*
702     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
703     because, for example, the problem is small. This version is more efficient because it does not require copying any data
704 */
705 static PetscErrorCode PCSetUp_Seq(PC pc)
706 {
707   PC_MPI     *km = (PC_MPI *)pc->data;
708   Mat         sA;
709   const char *prefix;
710   char       *found = NULL, *cprefix;
711 
712   PetscFunctionBegin;
713   PCMPIServerInSolve = PETSC_TRUE;
714   PetscCall(PCGetOperators(pc, NULL, &sA));
715   PetscCall(PCGetOptionsPrefix(pc, &prefix));
716   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
717   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
718   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
719 
720   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
721   PetscCall(PCGetOptionsPrefix(pc, &prefix));
722   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
723   PetscCall(PetscStrallocpy(prefix, &cprefix));
724   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
725   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
726   *found = 0;
727   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
728   PetscCall(PetscFree(cprefix));
729 
730   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
731   PetscCall(KSPSetFromOptions(km->ksps[0]));
732   PetscCall(KSPSetUp(km->ksps[0]));
733   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
734   PCMPIKSPCountsSeq++;
735   PCMPIServerInSolve = PETSC_FALSE;
736   PetscFunctionReturn(PETSC_SUCCESS);
737 }
738 
739 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
740 {
741   PC_MPI  *km = (PC_MPI *)pc->data;
742   PetscInt its, n;
743   Mat      A;
744 
745   PetscFunctionBegin;
746   PCMPIServerInSolve = PETSC_TRUE;
747   PetscCall(KSPSolve(km->ksps[0], b, x));
748   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
749   PCMPISolveCountsSeq++;
750   PCMPIIterationsSeq += its;
751   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
752   PetscCall(MatGetSize(A, &n, NULL));
753   PCMPISizesSeq += n;
754   PCMPIServerInSolve = PETSC_FALSE;
755   /*
756     do not keep reference to previous rhs and solution since destroying them in the next KSPSolve()
757     my use PetscFree() instead of PCMPIArrayDeallocate()
758   */
759   PetscCall(VecDestroy(&km->ksps[0]->vec_rhs));
760   PetscCall(VecDestroy(&km->ksps[0]->vec_sol));
761   PetscFunctionReturn(PETSC_SUCCESS);
762 }
763 
764 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
765 {
766   PC_MPI *km = (PC_MPI *)pc->data;
767 
768   PetscFunctionBegin;
769   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
770   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
771   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
772   PetscFunctionReturn(PETSC_SUCCESS);
773 }
774 
775 static PetscErrorCode PCDestroy_Seq(PC pc)
776 {
777   PC_MPI *km = (PC_MPI *)pc->data;
778   Mat     A, B;
779   Vec     x, b;
780 
781   PetscFunctionBegin;
782   PCMPIServerInSolve = PETSC_TRUE;
783   /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */
784   PetscCall(KSPGetOperators(km->ksps[0], &A, &B));
785   PetscCall(PetscObjectReference((PetscObject)A));
786   PetscCall(PetscObjectReference((PetscObject)B));
787   PetscCall(KSPGetSolution(km->ksps[0], &x));
788   PetscCall(PetscObjectReference((PetscObject)x));
789   PetscCall(KSPGetRhs(km->ksps[0], &b));
790   PetscCall(PetscObjectReference((PetscObject)b));
791   PetscCall(KSPDestroy(&km->ksps[0]));
792   PetscCall(PetscFree(pc->data));
793   PCMPIServerInSolve = PETSC_FALSE;
794   PetscCall(MatDestroy(&A));
795   PetscCall(MatDestroy(&B));
796   PetscCall(VecDestroy(&x));
797   PetscCall(VecDestroy(&b));
798   PetscFunctionReturn(PETSC_SUCCESS);
799 }
800 
801 /*
802      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
803      right-hand side to the parallel PC
804 */
805 static PetscErrorCode PCSetUp_MPI(PC pc)
806 {
807   PC_MPI     *km = (PC_MPI *)pc->data;
808   PetscMPIInt rank, size;
809   PetscBool   newmatrix = PETSC_FALSE;
810 
811   PetscFunctionBegin;
812   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
813   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
814   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
815 
816   if (!pc->setupcalled) {
817     if (!km->alwaysuseserver) {
818       PetscInt n;
819       Mat      sA;
820       /* short circuit for small systems */
821       PetscCall(PCGetOperators(pc, &sA, &sA));
822       PetscCall(MatGetSize(sA, &n, NULL));
823       if (n < 2 * km->mincntperrank - 1 || size == 1) {
824         pc->ops->setup   = NULL;
825         pc->ops->apply   = PCApply_Seq;
826         pc->ops->destroy = PCDestroy_Seq;
827         pc->ops->view    = PCView_Seq;
828         PetscCall(PCSetUp_Seq(pc));
829         PetscFunctionReturn(PETSC_SUCCESS);
830       }
831     }
832 
833     PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE));
834     PetscCall(PCMPICreate(pc));
835     newmatrix = PETSC_TRUE;
836   }
837   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
838 
839   if (newmatrix) {
840     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
841     PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT));
842     PetscCall(PCMPISetMat(pc));
843   } else {
844     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
845     PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES));
846     PetscCall(PCMPIUpdateMatValues(pc));
847   }
848   PetscFunctionReturn(PETSC_SUCCESS);
849 }
850 
851 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
852 {
853   PetscFunctionBegin;
854   PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE));
855   PetscCall(PCMPISolve(pc, b, x));
856   PetscFunctionReturn(PETSC_SUCCESS);
857 }
858 
859 static PetscErrorCode PCDestroy_MPI(PC pc)
860 {
861   PetscFunctionBegin;
862   PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY));
863   PetscCall(PCMPIDestroy(pc));
864   PetscCall(PetscFree(pc->data));
865   PetscFunctionReturn(PETSC_SUCCESS);
866 }
867 
868 /*
869      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database
870 */
871 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
872 {
873   PC_MPI     *km = (PC_MPI *)pc->data;
874   MPI_Comm    comm;
875   PetscMPIInt size;
876 
877   PetscFunctionBegin;
878   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
879   PetscCallMPI(MPI_Comm_size(comm, &size));
880   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
881   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %d\n", (int)km->mincntperrank));
882   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n"));
883   PetscFunctionReturn(PETSC_SUCCESS);
884 }
885 
886 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
887 {
888   PC_MPI *km = (PC_MPI *)pc->data;
889 
890   PetscFunctionBegin;
891   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
892   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
893   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
894   PetscOptionsHeadEnd();
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 /*MC
899      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
900 
901    Options Database Keys for the Server:
902 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
903 .  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
904 -  -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true
905 
906    Options Database Keys for a specific `KSP` object
907 +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
908 -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
909 
910    Level: developer
911 
912    Notes:
913    This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or
914    `MatCreateSeqAIJWithArrays()`
915 
916    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
917 
918    It can be particularly useful for user OpenMP code or potentially user GPU code.
919 
920    When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
921    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
922 
923    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
924    because they are not the actual solver objects.
925 
926    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
927    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
928 
929    Developer Note:
930    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
931    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
932 
933 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
934 M*/
935 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
936 {
937   PC_MPI *km;
938   char   *found = NULL;
939 
940   PetscFunctionBegin;
941   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
942   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
943 
944   /* material from PCSetType() */
945   PetscTryTypeMethod(pc, destroy);
946   pc->ops->destroy = NULL;
947   pc->data         = NULL;
948 
949   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
950   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
951   pc->modifysubmatrices  = NULL;
952   pc->modifysubmatricesP = NULL;
953   pc->setupcalled        = 0;
954 
955   PetscCall(PetscNew(&km));
956   pc->data = (void *)km;
957 
958   km->mincntperrank = 10000;
959 
960   pc->ops->setup          = PCSetUp_MPI;
961   pc->ops->apply          = PCApply_MPI;
962   pc->ops->destroy        = PCDestroy_MPI;
963   pc->ops->view           = PCView_MPI;
964   pc->ops->setfromoptions = PCSetFromOptions_MPI;
965   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
966   PetscFunctionReturn(PETSC_SUCCESS);
967 }
968 
969 /*@
970   PCMPIGetKSP - Gets the `KSP` created by the `PCMPI`
971 
972   Not Collective
973 
974   Input Parameter:
975 . pc - the preconditioner context
976 
977   Output Parameter:
978 . innerksp - the inner `KSP`
979 
980   Level: advanced
981 
982 .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE`
983 @*/
984 PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp)
985 {
986   PC_MPI *red = (PC_MPI *)pc->data;
987 
988   PetscFunctionBegin;
989   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
990   PetscAssertPointer(innerksp, 2);
991   *innerksp = red->ksps[0];
992   PetscFunctionReturn(PETSC_SUCCESS);
993 }
994