xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision bcd3bd92eda2d5998e2f14c4bbfb33bd936bdc3e)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petsc.h>
16 
17 #define PC_MPI_MAX_RANKS  256
18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
19 
20 typedef struct {
21   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26 } PC_MPI;
27 
28 typedef enum {
29   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30   PCMPI_CREATE,
31   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
32   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33   PCMPI_SOLVE,
34   PCMPI_VIEW,
35   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36 } PCMPICommand;
37 
38 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
39 static PetscBool PCMPICommSet = PETSC_FALSE;
40 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41 static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
42 
43 static PetscErrorCode PCMPICommsCreate(void)
44 {
45   MPI_Comm    comm = PC_MPI_COMM_WORLD;
46   PetscMPIInt size, rank, i;
47 
48   PetscFunctionBegin;
49   PetscCallMPI(MPI_Comm_size(comm, &size));
50   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
51   PetscCallMPI(MPI_Comm_rank(comm, &rank));
52   /* comm for size 1 is useful only for debugging */
53   for (i = 0; i < size; i++) {
54     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
55     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
56     PCMPISolveCounts[i] = 0;
57     PCMPIKSPCounts[i]   = 0;
58     PCMPIIterations[i]  = 0;
59     PCMPISizes[i]       = 0;
60   }
61   PCMPICommSet = PETSC_TRUE;
62   PetscFunctionReturn(PETSC_SUCCESS);
63 }
64 
65 PetscErrorCode PCMPICommsDestroy(void)
66 {
67   MPI_Comm    comm = PC_MPI_COMM_WORLD;
68   PetscMPIInt size, rank, i;
69 
70   PetscFunctionBegin;
71   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
72   PetscCallMPI(MPI_Comm_size(comm, &size));
73   PetscCallMPI(MPI_Comm_rank(comm, &rank));
74   for (i = 0; i < size; i++) {
75     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
76   }
77   PCMPICommSet = PETSC_FALSE;
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 static PetscErrorCode PCMPICreate(PC pc)
82 {
83   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
84   MPI_Comm    comm = PC_MPI_COMM_WORLD;
85   KSP         ksp;
86   PetscInt    N[2], mincntperrank = 0;
87   PetscMPIInt size;
88   Mat         sA;
89   char       *prefix;
90   PetscMPIInt len = 0;
91 
92   PetscFunctionBegin;
93   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
94   PetscCallMPI(MPI_Comm_size(comm, &size));
95   if (pc) {
96     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
97     PetscCall(PCGetOperators(pc, &sA, &sA));
98     PetscCall(MatGetSize(sA, &N[0], &N[1]));
99   }
100   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
101 
102   /* choose a suitable sized MPI_Comm for the problem to be solved on */
103   if (km) mincntperrank = km->mincntperrank;
104   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106   if (comm == MPI_COMM_NULL) {
107     ksp = NULL;
108     PetscFunctionReturn(PETSC_SUCCESS);
109   }
110   PetscCall(PetscLogStagePush(PCMPIStage));
111   PetscCall(KSPCreate(comm, &ksp));
112   PetscCall(PetscLogStagePop());
113   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
114   if (pc) {
115     size_t slen;
116 
117     PetscCallMPI(MPI_Comm_size(comm, &size));
118     PCMPIKSPCounts[size - 1]++;
119     PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
120     PetscCall(PetscStrlen(prefix, &slen));
121     len = (PetscMPIInt)slen;
122   }
123   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
124   if (len) {
125     if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
126     PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
127     PetscCall(KSPSetOptionsPrefix(ksp, prefix));
128   }
129   PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 static PetscErrorCode PCMPISetMat(PC pc)
134 {
135   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
136   Mat                A;
137   PetscInt           m, n, *ia, *ja, j, bs;
138   Mat                sA;
139   MPI_Comm           comm = PC_MPI_COMM_WORLD;
140   KSP                ksp;
141   PetscLayout        layout;
142   const PetscInt    *IA = NULL, *JA = NULL;
143   const PetscInt    *range;
144   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
145   PetscScalar       *a;
146   const PetscScalar *sa               = NULL;
147   PetscInt           matproperties[7] = {0, 0, 0, 0, 0, 0, 0};
148 
149   PetscFunctionBegin;
150   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
151   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
152   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
153   if (pc) {
154     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
155 
156     PetscCallMPI(MPI_Comm_size(comm, &size));
157     PCMPIMatCounts[size - 1]++;
158     PetscCall(PCGetOperators(pc, &sA, &sA));
159     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
160     PetscCall(MatGetBlockSize(sA, &bs));
161     matproperties[2] = bs;
162     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
163     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
164     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
165     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
166     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
167     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
168     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
169     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
170   }
171   PetscCallMPI(MPI_Bcast(matproperties, 7, MPIU_INT, 0, comm));
172 
173   /* determine ownership ranges of matrix columns */
174   PetscCall(PetscLayoutCreate(comm, &layout));
175   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
176   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
177   PetscCall(PetscLayoutSetUp(layout));
178   PetscCall(PetscLayoutGetLocalSize(layout, &n));
179   PetscCall(PetscLayoutDestroy(&layout));
180 
181   /* determine ownership ranges of matrix rows */
182   PetscCall(PetscLayoutCreate(comm, &layout));
183   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
184   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
185   PetscCall(PetscLayoutSetUp(layout));
186   PetscCall(PetscLayoutGetLocalSize(layout, &m));
187 
188   /* copy over the matrix nonzero structure and values */
189   if (pc) {
190     NZ      = km->NZ;
191     NZdispl = km->NZdispl;
192     PetscCall(PetscLayoutGetRanges(layout, &range));
193     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
194     for (i = 0; i < size; i++) {
195       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
196       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
197     }
198     displi[0]  = 0;
199     NZdispl[0] = 0;
200     for (j = 1; j < size; j++) {
201       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
202       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
203     }
204     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
205   }
206   PetscCall(PetscLayoutDestroy(&layout));
207   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
208 
209   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
210   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
211   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
212   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
213 
214   if (pc) {
215     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
216     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
217   }
218 
219   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
220   ia[0] = 0;
221   PetscCall(PetscLogStagePush(PCMPIStage));
222   PetscCall(MatCreateMPIAIJWithArrays(comm, m, n, matproperties[0], matproperties[1], ia, ja, a, &A));
223   PetscCall(MatSetBlockSize(A, matproperties[2]));
224   PetscCall(MatSetOptionsPrefix(A, "mpi_"));
225   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
226   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
227   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
228   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
229 
230   PetscCall(PetscFree3(ia, ja, a));
231   PetscCall(KSPSetOperators(ksp, A, A));
232   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
233   PetscCall(PetscLogStagePop());
234   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
235     const PetscInt *range;
236 
237     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
238     for (i = 0; i < size; i++) {
239       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
240       km->displ[i]     = (PetscMPIInt)range[i];
241     }
242   }
243   PetscCall(MatDestroy(&A));
244   PetscCall(KSPSetFromOptions(ksp));
245   PetscFunctionReturn(PETSC_SUCCESS);
246 }
247 
248 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
249 {
250   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
251   KSP                ksp;
252   Mat                sA, A;
253   MPI_Comm           comm = PC_MPI_COMM_WORLD;
254   PetscScalar       *a;
255   PetscCount         nz;
256   const PetscScalar *sa = NULL;
257   PetscMPIInt        size;
258   PetscInt           matproperties[4] = {0, 0, 0, 0};
259 
260   PetscFunctionBegin;
261   if (pc) {
262     PetscCall(PCGetOperators(pc, &sA, &sA));
263     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
264   }
265   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
266   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
267   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
268   PetscCallMPI(MPI_Comm_size(comm, &size));
269   PCMPIMatCounts[size - 1]++;
270   PetscCall(KSPGetOperators(ksp, NULL, &A));
271   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
272   PetscCall(PetscMalloc1(nz, &a));
273   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
274   if (pc) {
275     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
276 
277     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
278 
279     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
280     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
281     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
282     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
283     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
284     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
285     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
286     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
287   }
288   PetscCall(MatUpdateMPIAIJWithArray(A, a));
289   PetscCall(PetscFree(a));
290   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
291   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
292   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
293   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
294   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
295   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
296   PetscFunctionReturn(PETSC_SUCCESS);
297 }
298 
299 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
300 {
301   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
302   KSP                ksp;
303   MPI_Comm           comm = PC_MPI_COMM_WORLD;
304   const PetscScalar *sb   = NULL, *x;
305   PetscScalar       *b, *sx = NULL;
306   PetscInt           its, n;
307   PetscMPIInt        size;
308 
309   PetscFunctionBegin;
310   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
311   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
312   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
313 
314   /* TODO: optimize code to not require building counts/displ every time */
315 
316   /* scatterv rhs */
317   PetscCallMPI(MPI_Comm_size(comm, &size));
318   if (pc) {
319     PetscInt N;
320 
321     PCMPISolveCounts[size - 1]++;
322     PetscCall(MatGetSize(pc->pmat, &N, NULL));
323     PCMPISizes[size - 1] += N;
324     PetscCall(VecGetArrayRead(B, &sb));
325   }
326   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
327   PetscCall(VecGetArray(ksp->vec_rhs, &b));
328   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
329   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
330   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
331 
332   PetscCall(PetscLogStagePush(PCMPIStage));
333   PetscCall(KSPSolve(ksp, NULL, NULL));
334   PetscCall(PetscLogStagePop());
335   PetscCall(KSPGetIterationNumber(ksp, &its));
336   PCMPIIterations[size - 1] += its;
337 
338   /* gather solution */
339   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
340   if (pc) PetscCall(VecGetArray(X, &sx));
341   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
342   if (pc) PetscCall(VecRestoreArray(X, &sx));
343   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 
347 static PetscErrorCode PCMPIDestroy(PC pc)
348 {
349   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
350   KSP      ksp;
351   MPI_Comm comm = PC_MPI_COMM_WORLD;
352 
353   PetscFunctionBegin;
354   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
355   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
356   PetscCall(KSPDestroy(&ksp));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 /*@C
361   PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
362   parallel `KSP` solves and management of parallel `KSP` objects.
363 
364   Logically Collective on all MPI ranks except 0
365 
366   Options Database Keys:
367 + -mpi_linear_solver_server      - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
368 - -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
369 
370   Level: developer
371 
372   Note:
373   This is normally started automatically in `PetscInitialize()` when the option is provided
374 
375   Developer Notes:
376   When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
377   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
378   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
379 
380   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
381 
382 .seealso: `PCMPIServerEnd()`, `PCMPI`
383 @*/
384 PetscErrorCode PCMPIServerBegin(void)
385 {
386   PetscMPIInt rank;
387 
388   PetscFunctionBegin;
389   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
390   if (PetscDefined(USE_SINGLE_LIBRARY)) {
391     PetscCall(VecInitializePackage());
392     PetscCall(MatInitializePackage());
393     PetscCall(DMInitializePackage());
394     PetscCall(PCInitializePackage());
395     PetscCall(KSPInitializePackage());
396     PetscCall(SNESInitializePackage());
397     PetscCall(TSInitializePackage());
398     PetscCall(TaoInitializePackage());
399   }
400 
401   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
402   if (rank == 0) {
403     PETSC_COMM_WORLD = PETSC_COMM_SELF;
404     PetscFunctionReturn(PETSC_SUCCESS);
405   }
406 
407   while (PETSC_TRUE) {
408     PCMPICommand request = PCMPI_CREATE;
409     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
410     switch (request) {
411     case PCMPI_CREATE:
412       PetscCall(PCMPICreate(NULL));
413       break;
414     case PCMPI_SET_MAT:
415       PetscCall(PCMPISetMat(NULL));
416       break;
417     case PCMPI_UPDATE_MAT_VALUES:
418       PetscCall(PCMPIUpdateMatValues(NULL));
419       break;
420     case PCMPI_VIEW:
421       // PetscCall(PCMPIView(NULL));
422       break;
423     case PCMPI_SOLVE:
424       PetscCall(PCMPISolve(NULL, NULL, NULL));
425       break;
426     case PCMPI_DESTROY:
427       PetscCall(PCMPIDestroy(NULL));
428       break;
429     case PCMPI_EXIT:
430       PetscCall(PetscFinalize());
431       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
432       break;
433     default:
434       break;
435     }
436   }
437   PetscFunctionReturn(PETSC_SUCCESS);
438 }
439 
440 /*@C
441   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
442   parallel KSP solves and management of parallel `KSP` objects.
443 
444   Logically Collective on all MPI ranks except 0
445 
446   Level: developer
447 
448   Note:
449   This is normally ended automatically in `PetscFinalize()` when the option is provided
450 
451 .seealso: `PCMPIServerBegin()`, `PCMPI`
452 @*/
453 PetscErrorCode PCMPIServerEnd(void)
454 {
455   PCMPICommand request = PCMPI_EXIT;
456 
457   PetscFunctionBegin;
458   if (PetscGlobalRank == 0) {
459     PetscViewer       viewer = NULL;
460     PetscViewerFormat format;
461 
462     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
463     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
464     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
465     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
466     PetscOptionsEnd();
467     if (viewer) {
468       PetscBool isascii;
469 
470       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
471       if (isascii) {
472         PetscMPIInt size;
473         PetscMPIInt i;
474 
475         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
476         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
477         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
478         if (PCMPIKSPCountsSeq) {
479           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
480         }
481         for (i = 0; i < size; i++) {
482           if (PCMPIKSPCounts[i]) {
483             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
484           }
485         }
486       }
487       PetscCall(PetscViewerDestroy(&viewer));
488     }
489   }
490   PetscCall(PCMPICommsDestroy());
491   PetscFunctionReturn(PETSC_SUCCESS);
492 }
493 
494 /*
495     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
496     because, for example, the problem is small. This version is more efficient because it does not require copying any data
497 */
498 static PetscErrorCode PCSetUp_Seq(PC pc)
499 {
500   PC_MPI     *km = (PC_MPI *)pc->data;
501   Mat         sA;
502   const char *prefix;
503 
504   PetscFunctionBegin;
505   PetscCall(PCGetOperators(pc, NULL, &sA));
506   PetscCall(PCGetOptionsPrefix(pc, &prefix));
507   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
508   PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
509   PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
510   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
511   PetscCall(KSPSetFromOptions(km->ksps[0]));
512   PetscCall(KSPSetUp(km->ksps[0]));
513   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
514   PCMPIKSPCountsSeq++;
515   PetscFunctionReturn(PETSC_SUCCESS);
516 }
517 
518 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
519 {
520   PC_MPI  *km = (PC_MPI *)pc->data;
521   PetscInt its, n;
522   Mat      A;
523 
524   PetscFunctionBegin;
525   PetscCall(KSPSolve(km->ksps[0], b, x));
526   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
527   PCMPISolveCountsSeq++;
528   PCMPIIterationsSeq += its;
529   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
530   PetscCall(MatGetSize(A, &n, NULL));
531   PCMPISizesSeq += n;
532   PetscFunctionReturn(PETSC_SUCCESS);
533 }
534 
535 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
536 {
537   PC_MPI     *km = (PC_MPI *)pc->data;
538   const char *prefix;
539 
540   PetscFunctionBegin;
541   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
542   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
543   PetscCall(PCGetOptionsPrefix(pc, &prefix));
544   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
545   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
546   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 static PetscErrorCode PCDestroy_Seq(PC pc)
551 {
552   PC_MPI *km = (PC_MPI *)pc->data;
553 
554   PetscFunctionBegin;
555   PetscCall(KSPDestroy(&km->ksps[0]));
556   PetscCall(PetscFree(pc->data));
557   PetscFunctionReturn(PETSC_SUCCESS);
558 }
559 
560 /*
561      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
562      right hand side to the parallel PC
563 */
564 static PetscErrorCode PCSetUp_MPI(PC pc)
565 {
566   PC_MPI      *km = (PC_MPI *)pc->data;
567   PetscMPIInt  rank, size;
568   PCMPICommand request;
569   PetscBool    newmatrix = PETSC_FALSE;
570 
571   PetscFunctionBegin;
572   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
573   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
574   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
575 
576   if (!pc->setupcalled) {
577     if (!km->alwaysuseserver) {
578       PetscInt n;
579       Mat      sA;
580       /* short circuit for small systems */
581       PetscCall(PCGetOperators(pc, &sA, &sA));
582       PetscCall(MatGetSize(sA, &n, NULL));
583       if (n < 2 * km->mincntperrank - 1 || size == 1) {
584         pc->ops->setup   = NULL;
585         pc->ops->apply   = PCApply_Seq;
586         pc->ops->destroy = PCDestroy_Seq;
587         pc->ops->view    = PCView_Seq;
588         PetscCall(PCSetUp_Seq(pc));
589         PetscFunctionReturn(PETSC_SUCCESS);
590       }
591     }
592 
593     request = PCMPI_CREATE;
594     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
595     PetscCall(PCMPICreate(pc));
596     newmatrix = PETSC_TRUE;
597   }
598   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
599 
600   if (newmatrix) {
601     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
602     request = PCMPI_SET_MAT;
603     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
604     PetscCall(PCMPISetMat(pc));
605   } else {
606     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
607     request = PCMPI_UPDATE_MAT_VALUES;
608     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
609     PetscCall(PCMPIUpdateMatValues(pc));
610   }
611   PetscFunctionReturn(PETSC_SUCCESS);
612 }
613 
614 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
615 {
616   PCMPICommand request = PCMPI_SOLVE;
617 
618   PetscFunctionBegin;
619   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
620   PetscCall(PCMPISolve(pc, b, x));
621   PetscFunctionReturn(PETSC_SUCCESS);
622 }
623 
624 PetscErrorCode PCDestroy_MPI(PC pc)
625 {
626   PCMPICommand request = PCMPI_DESTROY;
627 
628   PetscFunctionBegin;
629   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
630   PetscCall(PCMPIDestroy(pc));
631   PetscCall(PetscFree(pc->data));
632   PetscFunctionReturn(PETSC_SUCCESS);
633 }
634 
635 /*
636      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
637 */
638 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
639 {
640   PC_MPI     *km = (PC_MPI *)pc->data;
641   MPI_Comm    comm;
642   PetscMPIInt size;
643   const char *prefix;
644 
645   PetscFunctionBegin;
646   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
647   PetscCallMPI(MPI_Comm_size(comm, &size));
648   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
649   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
650   PetscCall(PCGetOptionsPrefix(pc, &prefix));
651   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
652   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
653   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
654   PetscFunctionReturn(PETSC_SUCCESS);
655 }
656 
657 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
658 {
659   PC_MPI *km = (PC_MPI *)pc->data;
660 
661   PetscFunctionBegin;
662   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
663   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
664   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
665   PetscOptionsHeadEnd();
666   PetscFunctionReturn(PETSC_SUCCESS);
667 }
668 
669 /*MC
670      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
671 
672    Options Database Keys:
673 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
674 .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
675 .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
676 -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
677 
678    Level: beginner
679 
680    Notes:
681    The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_
682 
683    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
684    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
685 
686    It can be particularly useful for user OpenMP code or potentially user GPU code.
687 
688    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
689    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.
690 
691    The solver options for `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
692    because they are not the actual solver objects.
693 
694    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
695    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
696 
697 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
698 M*/
699 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
700 {
701   PC_MPI *km;
702 
703   PetscFunctionBegin;
704   PetscCall(PetscNew(&km));
705   pc->data = (void *)km;
706 
707   km->mincntperrank = 10000;
708 
709   pc->ops->setup          = PCSetUp_MPI;
710   pc->ops->apply          = PCApply_MPI;
711   pc->ops->destroy        = PCDestroy_MPI;
712   pc->ops->view           = PCView_MPI;
713   pc->ops->setfromoptions = PCSetFromOptions_MPI;
714   PetscFunctionReturn(PETSC_SUCCESS);
715 }
716