xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision df4cd43f92eaa320656440c40edb1046daee8f75)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petsc.h>
16 
17 #define PC_MPI_MAX_RANKS  256
18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
19 
20 typedef struct {
21   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26 } PC_MPI;
27 
28 typedef enum {
29   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30   PCMPI_CREATE,
31   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
32   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33   PCMPI_SOLVE,
34   PCMPI_VIEW,
35   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36 } PCMPICommand;
37 
38 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
39 static PetscBool PCMPICommSet = PETSC_FALSE;
40 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41 static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
42 
43 static PetscErrorCode PCMPICommsCreate(void)
44 {
45   MPI_Comm    comm = PC_MPI_COMM_WORLD;
46   PetscMPIInt size, rank, i;
47 
48   PetscFunctionBegin;
49   PetscCallMPI(MPI_Comm_size(comm, &size));
50   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
51   PetscCallMPI(MPI_Comm_rank(comm, &rank));
52   /* comm for size 1 is useful only for debugging */
53   for (i = 0; i < size; i++) {
54     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
55     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
56     PCMPISolveCounts[i] = 0;
57     PCMPIKSPCounts[i]   = 0;
58     PCMPIIterations[i]  = 0;
59     PCMPISizes[i]       = 0;
60   }
61   PCMPICommSet = PETSC_TRUE;
62   PetscFunctionReturn(PETSC_SUCCESS);
63 }
64 
65 PetscErrorCode PCMPICommsDestroy(void)
66 {
67   MPI_Comm    comm = PC_MPI_COMM_WORLD;
68   PetscMPIInt size, rank, i;
69 
70   PetscFunctionBegin;
71   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
72   PetscCallMPI(MPI_Comm_size(comm, &size));
73   PetscCallMPI(MPI_Comm_rank(comm, &rank));
74   for (i = 0; i < size; i++) {
75     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
76   }
77   PCMPICommSet = PETSC_FALSE;
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 static PetscErrorCode PCMPICreate(PC pc)
82 {
83   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
84   MPI_Comm    comm = PC_MPI_COMM_WORLD;
85   KSP         ksp;
86   PetscInt    N[2], mincntperrank = 0;
87   PetscMPIInt size;
88   Mat         sA;
89   char       *prefix;
90   PetscMPIInt len = 0;
91 
92   PetscFunctionBegin;
93   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
94   PetscCallMPI(MPI_Comm_size(comm, &size));
95   if (pc) {
96     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
97     PetscCall(PCGetOperators(pc, &sA, &sA));
98     PetscCall(MatGetSize(sA, &N[0], &N[1]));
99   }
100   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
101 
102   /* choose a suitable sized MPI_Comm for the problem to be solved on */
103   if (km) mincntperrank = km->mincntperrank;
104   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106   if (comm == MPI_COMM_NULL) {
107     ksp = NULL;
108     PetscFunctionReturn(PETSC_SUCCESS);
109   }
110   PetscCall(PetscLogStagePush(PCMPIStage));
111   PetscCall(KSPCreate(comm, &ksp));
112   PetscCall(PetscLogStagePop());
113   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
114   if (pc) {
115     size_t slen;
116 
117     PetscCallMPI(MPI_Comm_size(comm, &size));
118     PCMPIKSPCounts[size - 1]++;
119     PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
120     PetscCall(PetscStrlen(prefix, &slen));
121     len = (PetscMPIInt)slen;
122   }
123   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
124   if (len) {
125     if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
126     PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
127     PetscCall(KSPSetOptionsPrefix(ksp, prefix));
128   }
129   PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 static PetscErrorCode PCMPISetMat(PC pc)
134 {
135   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
136   Mat                A;
137   PetscInt           m, n, *ia, *ja, j, bs;
138   Mat                sA;
139   MPI_Comm           comm = PC_MPI_COMM_WORLD;
140   KSP                ksp;
141   PetscLayout        layout;
142   const PetscInt    *IA = NULL, *JA = NULL;
143   const PetscInt    *range;
144   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
145   PetscScalar       *a;
146   const PetscScalar *sa               = NULL;
147   PetscInt           matproperties[7] = {0, 0, 0, 0, 0, 0, 0};
148 
149   PetscFunctionBegin;
150   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
151   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
152   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
153   if (pc) {
154     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
155 
156     PetscCallMPI(MPI_Comm_size(comm, &size));
157     PCMPIMatCounts[size - 1]++;
158     PetscCall(PCGetOperators(pc, &sA, &sA));
159     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
160     PetscCall(MatGetBlockSize(sA, &bs));
161     matproperties[2] = bs;
162     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
163     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
164     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
165     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
166     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
167     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
168     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
169     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
170   }
171   PetscCallMPI(MPI_Bcast(matproperties, 7, MPIU_INT, 0, comm));
172 
173   /* determine ownership ranges of matrix columns */
174   PetscCall(PetscLayoutCreate(comm, &layout));
175   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
176   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
177   PetscCall(PetscLayoutSetUp(layout));
178   PetscCall(PetscLayoutGetLocalSize(layout, &n));
179   PetscCall(PetscLayoutDestroy(&layout));
180 
181   /* determine ownership ranges of matrix rows */
182   PetscCall(PetscLayoutCreate(comm, &layout));
183   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
184   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
185   PetscCall(PetscLayoutSetUp(layout));
186   PetscCall(PetscLayoutGetLocalSize(layout, &m));
187 
188   /* copy over the matrix nonzero structure and values */
189   if (pc) {
190     NZ      = km->NZ;
191     NZdispl = km->NZdispl;
192     PetscCall(PetscLayoutGetRanges(layout, &range));
193     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
194     for (i = 0; i < size; i++) {
195       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
196       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
197     }
198     displi[0]  = 0;
199     NZdispl[0] = 0;
200     for (j = 1; j < size; j++) {
201       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
202       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
203     }
204     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
205   }
206   PetscCall(PetscLayoutDestroy(&layout));
207   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
208 
209   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
210   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
211   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
212   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
213 
214   if (pc) {
215     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
216     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
217   }
218 
219   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
220   ia[0] = 0;
221   PetscCall(PetscLogStagePush(PCMPIStage));
222   PetscCall(MatCreateMPIAIJWithArrays(comm, m, n, matproperties[0], matproperties[1], ia, ja, a, &A));
223   PetscCall(MatSetBlockSize(A, matproperties[2]));
224   PetscCall(MatSetOptionsPrefix(A, "mpi_"));
225   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
226   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
227   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
228   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
229 
230   PetscCall(PetscFree3(ia, ja, a));
231   PetscCall(KSPSetOperators(ksp, A, A));
232   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
233   PetscCall(PetscLogStagePop());
234   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
235     const PetscInt *range;
236 
237     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
238     for (i = 0; i < size; i++) {
239       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
240       km->displ[i]     = (PetscMPIInt)range[i];
241     }
242   }
243   PetscCall(MatDestroy(&A));
244   PetscCall(KSPSetFromOptions(ksp));
245   PetscFunctionReturn(PETSC_SUCCESS);
246 }
247 
248 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
249 {
250   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
251   KSP                ksp;
252   Mat                sA, A;
253   MPI_Comm           comm = PC_MPI_COMM_WORLD;
254   PetscScalar       *a;
255   PetscCount         nz;
256   const PetscScalar *sa = NULL;
257   PetscMPIInt        size;
258   PetscInt           matproperties[4] = {0, 0, 0, 0};
259 
260   PetscFunctionBegin;
261   if (pc) {
262     PetscCall(PCGetOperators(pc, &sA, &sA));
263     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
264   }
265   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
266   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
267   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
268   PetscCallMPI(MPI_Comm_size(comm, &size));
269   PCMPIMatCounts[size - 1]++;
270   PetscCall(KSPGetOperators(ksp, NULL, &A));
271   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
272   PetscCall(PetscMalloc1(nz, &a));
273   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
274   if (pc) {
275     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
276 
277     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
278 
279     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
280     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
281     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
282     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
283     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
284     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
285     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
286     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
287   }
288   PetscCall(MatUpdateMPIAIJWithArray(A, a));
289   PetscCall(PetscFree(a));
290   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
291   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
292   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
293   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
294   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
295   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
296   PetscFunctionReturn(PETSC_SUCCESS);
297 }
298 
299 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
300 {
301   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
302   KSP                ksp;
303   MPI_Comm           comm = PC_MPI_COMM_WORLD;
304   const PetscScalar *sb   = NULL, *x;
305   PetscScalar       *b, *sx = NULL;
306   PetscInt           its, n;
307   PetscMPIInt        size;
308 
309   PetscFunctionBegin;
310   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
311   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
312   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
313 
314   /* TODO: optimize code to not require building counts/displ every time */
315 
316   /* scatterv rhs */
317   PetscCallMPI(MPI_Comm_size(comm, &size));
318   if (pc) {
319     PetscInt N;
320 
321     PCMPISolveCounts[size - 1]++;
322     PetscCall(MatGetSize(pc->pmat, &N, NULL));
323     ;
324     PCMPISizes[size - 1] += N;
325     PetscCall(VecGetArrayRead(B, &sb));
326   }
327   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
328   PetscCall(VecGetArray(ksp->vec_rhs, &b));
329   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
330   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
331   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
332 
333   PetscCall(PetscLogStagePush(PCMPIStage));
334   PetscCall(KSPSolve(ksp, NULL, NULL));
335   PetscCall(PetscLogStagePop());
336   PetscCall(KSPGetIterationNumber(ksp, &its));
337   PCMPIIterations[size - 1] += its;
338 
339   /* gather solution */
340   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
341   if (pc) PetscCall(VecGetArray(X, &sx));
342   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
343   if (pc) PetscCall(VecRestoreArray(X, &sx));
344   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
345   PetscFunctionReturn(PETSC_SUCCESS);
346 }
347 
348 static PetscErrorCode PCMPIDestroy(PC pc)
349 {
350   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
351   KSP      ksp;
352   MPI_Comm comm = PC_MPI_COMM_WORLD;
353 
354   PetscFunctionBegin;
355   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
356   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
357   PetscCall(KSPDestroy(&ksp));
358   PetscFunctionReturn(PETSC_SUCCESS);
359 }
360 
361 /*@C
362      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
363      parallel `KSP` solves and management of parallel `KSP` objects.
364 
365      Logically collective on all MPI ranks except 0
366 
367    Options Database Keys:
368 +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
369 -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
370 
371      Note:
372       This is normally started automatically in `PetscInitialize()` when the option is provided
373 
374      Developer Notes:
375        When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
376        written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
377        (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
378 
379        Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
380 
381      Level: developer
382 
383 .seealso: `PCMPIServerEnd()`, `PCMPI`
384 @*/
385 PetscErrorCode PCMPIServerBegin(void)
386 {
387   PetscMPIInt rank;
388 
389   PetscFunctionBegin;
390   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
391   if (PetscDefined(USE_SINGLE_LIBRARY)) {
392     PetscCall(VecInitializePackage());
393     PetscCall(MatInitializePackage());
394     PetscCall(DMInitializePackage());
395     PetscCall(PCInitializePackage());
396     PetscCall(KSPInitializePackage());
397     PetscCall(SNESInitializePackage());
398     PetscCall(TSInitializePackage());
399     PetscCall(TaoInitializePackage());
400   }
401 
402   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
403   if (rank == 0) {
404     PETSC_COMM_WORLD = PETSC_COMM_SELF;
405     PetscFunctionReturn(PETSC_SUCCESS);
406   }
407 
408   while (PETSC_TRUE) {
409     PCMPICommand request = PCMPI_CREATE;
410     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
411     switch (request) {
412     case PCMPI_CREATE:
413       PetscCall(PCMPICreate(NULL));
414       break;
415     case PCMPI_SET_MAT:
416       PetscCall(PCMPISetMat(NULL));
417       break;
418     case PCMPI_UPDATE_MAT_VALUES:
419       PetscCall(PCMPIUpdateMatValues(NULL));
420       break;
421     case PCMPI_VIEW:
422       // PetscCall(PCMPIView(NULL));
423       break;
424     case PCMPI_SOLVE:
425       PetscCall(PCMPISolve(NULL, NULL, NULL));
426       break;
427     case PCMPI_DESTROY:
428       PetscCall(PCMPIDestroy(NULL));
429       break;
430     case PCMPI_EXIT:
431       PetscCall(PetscFinalize());
432       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
433       break;
434     default:
435       break;
436     }
437   }
438   PetscFunctionReturn(PETSC_SUCCESS);
439 }
440 
441 /*@C
442      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
443      parallel KSP solves and management of parallel `KSP` objects.
444 
445      Logically collective on all MPI ranks except 0
446 
447      Note:
448       This is normally ended automatically in `PetscFinalize()` when the option is provided
449 
450      Level: developer
451 
452 .seealso: `PCMPIServerBegin()`, `PCMPI`
453 @*/
454 PetscErrorCode PCMPIServerEnd(void)
455 {
456   PCMPICommand request = PCMPI_EXIT;
457 
458   PetscFunctionBegin;
459   if (PetscGlobalRank == 0) {
460     PetscViewer       viewer = NULL;
461     PetscViewerFormat format;
462 
463     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
464     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
465     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
466     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
467     PetscOptionsEnd();
468     if (viewer) {
469       PetscBool isascii;
470 
471       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
472       if (isascii) {
473         PetscMPIInt size;
474         PetscMPIInt i;
475 
476         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
477         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
478         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
479         if (PCMPIKSPCountsSeq) {
480           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
481         }
482         for (i = 0; i < size; i++) {
483           if (PCMPIKSPCounts[i]) {
484             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
485           }
486         }
487       }
488       PetscCall(PetscViewerDestroy(&viewer));
489     }
490   }
491   PetscCall(PCMPICommsDestroy());
492   PetscFunctionReturn(PETSC_SUCCESS);
493 }
494 
495 /*
496     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
497     because, for example, the problem is small. This version is more efficient because it does not require copying any data
498 */
499 static PetscErrorCode PCSetUp_Seq(PC pc)
500 {
501   PC_MPI     *km = (PC_MPI *)pc->data;
502   Mat         sA;
503   const char *prefix;
504 
505   PetscFunctionBegin;
506   PetscCall(PCGetOperators(pc, NULL, &sA));
507   PetscCall(PCGetOptionsPrefix(pc, &prefix));
508   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
509   PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
510   PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
511   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
512   PetscCall(KSPSetFromOptions(km->ksps[0]));
513   PetscCall(KSPSetUp(km->ksps[0]));
514   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
515   PCMPIKSPCountsSeq++;
516   PetscFunctionReturn(PETSC_SUCCESS);
517 }
518 
519 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
520 {
521   PC_MPI  *km = (PC_MPI *)pc->data;
522   PetscInt its, n;
523   Mat      A;
524 
525   PetscFunctionBegin;
526   PetscCall(KSPSolve(km->ksps[0], b, x));
527   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
528   PCMPISolveCountsSeq++;
529   PCMPIIterationsSeq += its;
530   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
531   PetscCall(MatGetSize(A, &n, NULL));
532   PCMPISizesSeq += n;
533   PetscFunctionReturn(PETSC_SUCCESS);
534 }
535 
536 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
537 {
538   PC_MPI     *km = (PC_MPI *)pc->data;
539   const char *prefix;
540 
541   PetscFunctionBegin;
542   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
543   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
544   PetscCall(PCGetOptionsPrefix(pc, &prefix));
545   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
546   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
547   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
548   PetscFunctionReturn(PETSC_SUCCESS);
549 }
550 
551 static PetscErrorCode PCDestroy_Seq(PC pc)
552 {
553   PC_MPI *km = (PC_MPI *)pc->data;
554 
555   PetscFunctionBegin;
556   PetscCall(KSPDestroy(&km->ksps[0]));
557   PetscCall(PetscFree(pc->data));
558   PetscFunctionReturn(PETSC_SUCCESS);
559 }
560 
561 /*
562      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
563      right hand side to the parallel PC
564 */
565 static PetscErrorCode PCSetUp_MPI(PC pc)
566 {
567   PC_MPI      *km = (PC_MPI *)pc->data;
568   PetscMPIInt  rank, size;
569   PCMPICommand request;
570   PetscBool    newmatrix = PETSC_FALSE;
571 
572   PetscFunctionBegin;
573   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
574   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
575   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
576 
577   if (!pc->setupcalled) {
578     if (!km->alwaysuseserver) {
579       PetscInt n;
580       Mat      sA;
581       /* short circuit for small systems */
582       PetscCall(PCGetOperators(pc, &sA, &sA));
583       PetscCall(MatGetSize(sA, &n, NULL));
584       if (n < 2 * km->mincntperrank - 1 || size == 1) {
585         pc->ops->setup   = NULL;
586         pc->ops->apply   = PCApply_Seq;
587         pc->ops->destroy = PCDestroy_Seq;
588         pc->ops->view    = PCView_Seq;
589         PetscCall(PCSetUp_Seq(pc));
590         PetscFunctionReturn(PETSC_SUCCESS);
591       }
592     }
593 
594     request = PCMPI_CREATE;
595     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
596     PetscCall(PCMPICreate(pc));
597     newmatrix = PETSC_TRUE;
598   }
599   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
600 
601   if (newmatrix) {
602     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
603     request = PCMPI_SET_MAT;
604     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
605     PetscCall(PCMPISetMat(pc));
606   } else {
607     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n"));
608     request = PCMPI_UPDATE_MAT_VALUES;
609     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
610     PetscCall(PCMPIUpdateMatValues(pc));
611   }
612   PetscFunctionReturn(PETSC_SUCCESS);
613 }
614 
615 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
616 {
617   PCMPICommand request = PCMPI_SOLVE;
618 
619   PetscFunctionBegin;
620   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
621   PetscCall(PCMPISolve(pc, b, x));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 PetscErrorCode PCDestroy_MPI(PC pc)
626 {
627   PCMPICommand request = PCMPI_DESTROY;
628 
629   PetscFunctionBegin;
630   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
631   PetscCall(PCMPIDestroy(pc));
632   PetscCall(PetscFree(pc->data));
633   PetscFunctionReturn(PETSC_SUCCESS);
634 }
635 
636 /*
637      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
638 */
639 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
640 {
641   PC_MPI     *km = (PC_MPI *)pc->data;
642   MPI_Comm    comm;
643   PetscMPIInt size;
644   const char *prefix;
645 
646   PetscFunctionBegin;
647   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
648   PetscCallMPI(MPI_Comm_size(comm, &size));
649   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
650   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
651   PetscCall(PCGetOptionsPrefix(pc, &prefix));
652   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
653   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
654   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
659 {
660   PC_MPI *km = (PC_MPI *)pc->data;
661 
662   PetscFunctionBegin;
663   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
664   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
665   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
666   PetscOptionsHeadEnd();
667   PetscFunctionReturn(PETSC_SUCCESS);
668 }
669 
670 /*MC
671      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
672 
673    Level: beginner
674 
675    Options Database Keys:
676 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
677 .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
678 .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
679 -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
680 
681    Notes:
682    The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_
683 
684    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
685    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
686 
687    It can be particularly useful for user OpenMP code or potentially user GPU code.
688 
689    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
690    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.
691 
692    The solver options for `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
693    because they are not the actual solver objects.
694 
695    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
696    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
697 
698 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
699 M*/
700 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
701 {
702   PC_MPI *km;
703 
704   PetscFunctionBegin;
705   PetscCall(PetscNew(&km));
706   pc->data = (void *)km;
707 
708   km->mincntperrank = 10000;
709 
710   pc->ops->setup          = PCSetUp_MPI;
711   pc->ops->apply          = PCApply_MPI;
712   pc->ops->destroy        = PCDestroy_MPI;
713   pc->ops->view           = PCView_MPI;
714   pc->ops->setfromoptions = PCSetFromOptions_MPI;
715   PetscFunctionReturn(PETSC_SUCCESS);
716 }
717