xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 1511cd715a1f0c8d257549c5ebe5cee9c6feed4d)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 
16 #define PC_MPI_MAX_RANKS  256
17 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
18 
19 typedef struct {
20   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
21   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
22   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
23   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
24   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
25 } PC_MPI;
26 
27 typedef enum {
28   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
29   PCMPI_CREATE,
30   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
31   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
32   PCMPI_SOLVE,
33   PCMPI_VIEW,
34   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
35 } PCMPICommand;
36 
37 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
38 static PetscBool PCMPICommSet = PETSC_FALSE;
39 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
40 
41 static PetscErrorCode PCMPICommsCreate(void) {
42   MPI_Comm    comm = PC_MPI_COMM_WORLD;
43   PetscMPIInt size, rank, i;
44 
45   PetscFunctionBegin;
46   PetscCallMPI(MPI_Comm_size(comm, &size));
47   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
48   PetscCallMPI(MPI_Comm_rank(comm, &rank));
49   /* comm for size 1 is useful only for debugging */
50   for (i = 0; i < size; i++) {
51     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
52     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
53     PCMPISolveCounts[i] = 0;
54     PCMPIKSPCounts[i]   = 0;
55   }
56   PCMPICommSet = PETSC_TRUE;
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode PCMPICommsDestroy(void) {
61   MPI_Comm    comm = PC_MPI_COMM_WORLD;
62   PetscMPIInt size, rank, i;
63 
64   PetscFunctionBegin;
65   if (!PCMPICommSet) PetscFunctionReturn(0);
66   PetscCallMPI(MPI_Comm_size(comm, &size));
67   PetscCallMPI(MPI_Comm_rank(comm, &rank));
68   for (i = 0; i < size; i++) {
69     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
70   }
71   PCMPICommSet = PETSC_FALSE;
72   PetscFunctionReturn(0);
73 }
74 
75 static PetscErrorCode PCMPICreate(PC pc) {
76   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
77   MPI_Comm    comm = PC_MPI_COMM_WORLD;
78   KSP         ksp;
79   PetscInt    N[2], mincntperrank = 0;
80   PetscMPIInt size;
81   Mat         sA;
82   char       *prefix;
83   PetscMPIInt len = 0;
84 
85   PetscFunctionBegin;
86   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
87   PetscCallMPI(MPI_Comm_size(comm, &size));
88   if (pc) {
89     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
90     PetscCall(PCGetOperators(pc, &sA, &sA));
91     PetscCall(MatGetSize(sA, &N[0], &N[1]));
92   }
93   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
94 
95   /* choose a suitable sized MPI_Comm for the problem to be solved on */
96   if (km) mincntperrank = km->mincntperrank;
97   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
98   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
99   if (comm == MPI_COMM_NULL) {
100     ksp = NULL;
101     PetscFunctionReturn(0);
102   }
103   PetscCall(KSPCreate(comm, &ksp));
104   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
105   if (pc) {
106     size_t slen;
107 
108     PetscCallMPI(MPI_Comm_size(comm, &size));
109     PCMPIKSPCounts[size]++;
110     PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
111     PetscCall(PetscStrlen(prefix, &slen));
112     len = (PetscMPIInt)slen;
113   }
114   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
115   if (len) {
116     if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
117     PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
118     PetscCall(KSPSetOptionsPrefix(ksp, prefix));
119   }
120   PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
121   PetscFunctionReturn(0);
122 }
123 
124 static PetscErrorCode PCMPISetMat(PC pc) {
125   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
126   Mat                A;
127   PetscInt           N[2], n, *ia, *ja, j;
128   Mat                sA;
129   MPI_Comm           comm = PC_MPI_COMM_WORLD;
130   KSP                ksp;
131   PetscLayout        layout;
132   const PetscInt    *IA = NULL, *JA = NULL;
133   const PetscInt    *range;
134   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
135   PetscScalar       *a;
136   const PetscScalar *sa = NULL;
137 
138   PetscFunctionBegin;
139   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
140   if (!ksp) PetscFunctionReturn(0);
141   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
142   if (pc) {
143     PetscCallMPI(MPI_Comm_size(comm, &size));
144     PCMPIMatCounts[size]++;
145     PetscCall(PCGetOperators(pc, &sA, &sA));
146     PetscCall(MatGetSize(sA, &N[0], &N[1]));
147   }
148   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
149 
150   /* determine ownership ranges of matrix */
151   PetscCall(PetscLayoutCreate(comm, &layout));
152   PetscCall(PetscLayoutSetBlockSize(layout, 1));
153   PetscCall(PetscLayoutSetSize(layout, N[0]));
154   PetscCall(PetscLayoutSetUp(layout));
155   PetscCall(PetscLayoutGetLocalSize(layout, &n));
156 
157   /* copy over the matrix nonzero structure and values */
158   if (pc) {
159     NZ      = km->NZ;
160     NZdispl = km->NZdispl;
161     PetscCall(PetscLayoutGetRanges(layout, &range));
162     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
163     for (i = 0; i < size; i++) {
164       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
165       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
166     }
167     displi[0]  = 0;
168     NZdispl[0] = 0;
169     for (j = 1; j < size; j++) {
170       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
171       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
172     }
173     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
174   }
175   PetscCall(PetscLayoutDestroy(&layout));
176   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
177 
178   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
179   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
180   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
181   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
182 
183   if (pc) {
184     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
185     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
186   }
187 
188   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
189   ia[0] = 0;
190   PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A));
191   PetscCall(MatSetOptionsPrefix(A, "mpi_"));
192 
193   PetscCall(PetscFree3(ia, ja, a));
194   PetscCall(KSPSetOperators(ksp, A, A));
195   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
196   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
197     const PetscInt *range;
198 
199     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
200     for (i = 0; i < size; i++) {
201       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
202       km->displ[i]     = (PetscMPIInt)range[i];
203     }
204   }
205   PetscCall(MatDestroy(&A));
206   PetscCall(KSPSetFromOptions(ksp));
207   PetscFunctionReturn(0);
208 }
209 
210 static PetscErrorCode PCMPIUpdateMatValues(PC pc) {
211   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
212   KSP                ksp;
213   Mat                sA, A;
214   MPI_Comm           comm = PC_MPI_COMM_WORLD;
215   PetscScalar       *a;
216   PetscCount         nz;
217   const PetscScalar *sa = NULL;
218 
219   PetscFunctionBegin;
220   if (pc) {
221     PetscMPIInt size;
222 
223     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
224     PCMPIMatCounts[size]++;
225     PetscCall(PCGetOperators(pc, &sA, &sA));
226     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
227   }
228   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
229   if (!ksp) PetscFunctionReturn(0);
230   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
231   PetscCall(KSPGetOperators(ksp, NULL, &A));
232   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
233   PetscCall(PetscMalloc1(nz, &a));
234   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
235   if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
236   PetscCall(MatUpdateMPIAIJWithArray(A, a));
237   PetscCall(PetscFree(a));
238   PetscFunctionReturn(0);
239 }
240 
241 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) {
242   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
243   KSP                ksp;
244   MPI_Comm           comm = PC_MPI_COMM_WORLD;
245   const PetscScalar *sb   = NULL, *x;
246   PetscScalar       *b, *sx = NULL;
247   PetscInt           n;
248 
249   PetscFunctionBegin;
250   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
251   if (!ksp) PetscFunctionReturn(0);
252   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
253 
254   /* TODO: optimize code to not require building counts/displ everytime */
255 
256   /* scatterv rhs */
257   if (pc) {
258     PetscMPIInt size;
259 
260     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
261     PCMPISolveCounts[size]++;
262     PetscCall(VecGetArrayRead(B, &sb));
263   }
264   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
265   PetscCall(VecGetArray(ksp->vec_rhs, &b));
266   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
267   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
268   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
269 
270   PetscCall(KSPSolve(ksp, NULL, NULL));
271 
272   /* gather solution */
273   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
274   if (pc) PetscCall(VecGetArray(X, &sx));
275   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
276   if (pc) PetscCall(VecRestoreArray(X, &sx));
277   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
278   PetscFunctionReturn(0);
279 }
280 
281 static PetscErrorCode PCMPIDestroy(PC pc) {
282   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
283   KSP      ksp;
284   MPI_Comm comm = PC_MPI_COMM_WORLD;
285 
286   PetscFunctionBegin;
287   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
288   if (!ksp) PetscFunctionReturn(0);
289   PetscCall(KSPDestroy(&ksp));
290   PetscFunctionReturn(0);
291 }
292 
293 /*@C
294      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
295      parallel KSP solves and management of parallel KSP objects.
296 
297      Logically collective on all MPI ranks except 0
298 
299    Options Database:
300 +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
301 -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
302 
303      Notes:
304       This is normally started automatically in `PetscInitialize()` when the option is provided
305 
306      Developer Notes:
307        When called on rank zero this sets PETSC_COMM_WORLD to PETSC_COMM_SELF to allow a main program
308        written with PETSC_COMM_WORLD to run correctly on the single rank while all the ranks
309        (that would normally be sharing PETSC_COMM_WORLD) to run the solver server.
310 
311        Can this be integrated into the PetscDevice abstraction that is currently being developed?
312 
313      Level: developer
314 
315 .seealso: PCMPIServerEnd(), PCMPI
316 @*/
317 PetscErrorCode PCMPIServerBegin(void) {
318   PetscMPIInt rank;
319 
320   PetscFunctionBegin;
321   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
322   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
323   if (rank == 0) {
324     PETSC_COMM_WORLD = PETSC_COMM_SELF;
325     PetscFunctionReturn(0);
326   }
327 
328   while (PETSC_TRUE) {
329     PCMPICommand request = PCMPI_CREATE;
330     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
331     switch (request) {
332     case PCMPI_CREATE: PetscCall(PCMPICreate(NULL)); break;
333     case PCMPI_SET_MAT: PetscCall(PCMPISetMat(NULL)); break;
334     case PCMPI_UPDATE_MAT_VALUES: PetscCall(PCMPIUpdateMatValues(NULL)); break;
335     case PCMPI_VIEW:
336       // PetscCall(PCMPIView(NULL));
337       break;
338     case PCMPI_SOLVE: PetscCall(PCMPISolve(NULL, NULL, NULL)); break;
339     case PCMPI_DESTROY: PetscCall(PCMPIDestroy(NULL)); break;
340     case PCMPI_EXIT:
341       PetscCall(PetscFinalize());
342       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
343       break;
344     default: break;
345     }
346   }
347   PetscFunctionReturn(0);
348 }
349 
350 /*@C
351      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
352      parallel KSP solves and management of parallel KSP objects.
353 
354      Logically collective on all MPI ranks except 0
355 
356      Notes:
357       This is normally ended automatically in `PetscFinalize()` when the option is provided
358 
359      Level: developer
360 
361 .seealso: PCMPIServerBegin(), PCMPI
362 @*/
363 PetscErrorCode PCMPIServerEnd(void) {
364   PCMPICommand request = PCMPI_EXIT;
365 
366   PetscFunctionBegin;
367   if (PetscGlobalRank == 0) {
368     PetscViewer       viewer = NULL;
369     PetscViewerFormat format;
370 
371     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
372     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
373     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
374     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
375     PetscOptionsEnd();
376     if (viewer) {
377       PetscBool isascii;
378 
379       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
380       if (isascii) {
381         PetscMPIInt size;
382         PetscInt    i, ksp = 0, mat = 0, solve = 0;
383 
384         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
385         for (i = 1; i < size; i++) {
386           ksp += PCMPIKSPCounts[i];
387           mat += PCMPIMatCounts[i];
388           solve += PCMPISolveCounts[i];
389         }
390         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server:\n"));
391         PetscCall(PetscViewerASCIIPrintf(viewer, "  Parallel KSPs  %" PetscInt_FMT " Mats  %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", ksp, mat, solve));
392         PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential KSPs  %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", PCMPIKSPCountsSeq, PCMPISolveCountsSeq));
393       }
394       PetscCall(PetscViewerDestroy(&viewer));
395     }
396   }
397   PetscCall(PCMPICommsDestroy());
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
403     because, for example, the problem is small. This version is more efficient because it does not require copying any data
404 */
405 static PetscErrorCode PCSetUp_Seq(PC pc) {
406   PC_MPI     *km = (PC_MPI *)pc->data;
407   Mat         sA;
408   const char *prefix;
409 
410   PetscFunctionBegin;
411   PetscCall(PCGetOperators(pc, NULL, &sA));
412   PetscCall(PCGetOptionsPrefix(pc, &prefix));
413   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
414   PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
415   PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
416   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
417   PetscCall(KSPSetFromOptions(km->ksps[0]));
418   PetscCall(KSPSetUp(km->ksps[0]));
419   PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
420   PCMPIKSPCountsSeq++;
421   PetscFunctionReturn(0);
422 }
423 
424 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) {
425   PC_MPI *km = (PC_MPI *)pc->data;
426 
427   PetscFunctionBegin;
428   PetscCall(KSPSolve(km->ksps[0], b, x));
429   PCMPISolveCountsSeq++;
430   PetscFunctionReturn(0);
431 }
432 
433 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) {
434   PC_MPI     *km = (PC_MPI *)pc->data;
435   const char *prefix;
436 
437   PetscFunctionBegin;
438   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
439   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
440   PetscCall(PCGetOptionsPrefix(pc, &prefix));
441   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix));
442   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n"));
443   PetscFunctionReturn(0);
444 }
445 
446 static PetscErrorCode PCDestroy_Seq(PC pc) {
447   PC_MPI *km = (PC_MPI *)pc->data;
448 
449   PetscFunctionBegin;
450   PetscCall(KSPDestroy(&km->ksps[0]));
451   PetscCall(PetscFree(pc->data));
452   PetscFunctionReturn(0);
453 }
454 
455 /*
456      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
457      right hand side to the parallel PC
458 */
459 static PetscErrorCode PCSetUp_MPI(PC pc) {
460   PC_MPI      *km = (PC_MPI *)pc->data;
461   PetscMPIInt  rank, size;
462   PCMPICommand request;
463   PetscBool    newmatrix = PETSC_FALSE;
464 
465   PetscFunctionBegin;
466   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
467   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
468   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
469 
470   if (!pc->setupcalled) {
471     if (!km->alwaysuseserver) {
472       PetscInt n;
473       Mat      sA;
474       /* short circuit for small systems */
475       PetscCall(PCGetOperators(pc, &sA, &sA));
476       PetscCall(MatGetSize(sA, &n, NULL));
477       if (n < 2 * km->mincntperrank - 1 || size == 1) {
478         pc->ops->setup   = NULL;
479         pc->ops->apply   = PCApply_Seq;
480         pc->ops->destroy = PCDestroy_Seq;
481         pc->ops->view    = PCView_Seq;
482         PetscCall(PCSetUp_Seq(pc));
483         PetscFunctionReturn(0);
484       }
485     }
486 
487     request = PCMPI_CREATE;
488     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
489     PetscCall(PCMPICreate(pc));
490     newmatrix = PETSC_TRUE;
491   }
492   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
493 
494   if (newmatrix) {
495     PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n");
496     request = PCMPI_SET_MAT;
497     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
498     PetscCall(PCMPISetMat(pc));
499   } else {
500     PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n");
501     request = PCMPI_UPDATE_MAT_VALUES;
502     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
503     PetscCall(PCMPIUpdateMatValues(pc));
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) {
509   PCMPICommand request = PCMPI_SOLVE;
510 
511   PetscFunctionBegin;
512   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
513   PetscCall(PCMPISolve(pc, b, x));
514   PetscFunctionReturn(0);
515 }
516 
517 PetscErrorCode PCDestroy_MPI(PC pc) {
518   PCMPICommand request = PCMPI_DESTROY;
519 
520   PetscFunctionBegin;
521   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
522   PetscCall(PCMPIDestroy(pc));
523   PetscCall(PetscFree(pc->data));
524   PetscFunctionReturn(0);
525 }
526 
527 /*
528      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
529 */
530 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) {
531   PC_MPI     *km = (PC_MPI *)pc->data;
532   MPI_Comm    comm;
533   PetscMPIInt size;
534   const char *prefix;
535 
536   PetscFunctionBegin;
537   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
538   PetscCallMPI(MPI_Comm_size(comm, &size));
539   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
540   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
541   PetscCall(PCGetOptionsPrefix(pc, &prefix));
542   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix));
543   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n"));
544   PetscFunctionReturn(0);
545 }
546 
547 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) {
548   PC_MPI *km = (PC_MPI *)pc->data;
549 
550   PetscFunctionBegin;
551   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
552   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
553   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
554   PetscOptionsHeadEnd();
555   PetscFunctionReturn(0);
556 }
557 
558 /*MC
559      PCMPI - Calls an MPI parallel KSP to solve a linear system from user code running on one process
560 
561    Level: beginner
562 
563    Options Database:
564 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
565 .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
566 .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
567 -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
568 
569    Notes:
570    The options database prefix for the MPI parallel KSP and PC is -mpi_
571 
572    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
573    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
574 
575    It can be particularly useful for user OpenMP code or potentially user GPU code.
576 
577    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a KSP with the options prefix of -mpi)
578    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the KSP directly.
579 
580 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
581 
582 M*/
583 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) {
584   PC_MPI *km;
585 
586   PetscFunctionBegin;
587   PetscCall(PetscNewLog(pc, &km));
588   pc->data = (void *)km;
589 
590   km->mincntperrank = 10000;
591 
592   pc->ops->setup          = PCSetUp_MPI;
593   pc->ops->apply          = PCApply_MPI;
594   pc->ops->destroy        = PCDestroy_MPI;
595   pc->ops->view           = PCView_MPI;
596   pc->ops->setfromoptions = PCSetFromOptions_MPI;
597   PetscFunctionReturn(0);
598 }
599