xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petsc.h>
16 
17 #define PC_MPI_MAX_RANKS  256
18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
19 
20 typedef struct {
21   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26 } PC_MPI;
27 
28 typedef enum {
29   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30   PCMPI_CREATE,
31   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
32   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33   PCMPI_SOLVE,
34   PCMPI_VIEW,
35   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36 } PCMPICommand;
37 
38 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
39 static PetscBool PCMPICommSet = PETSC_FALSE;
40 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41 
42 static PetscErrorCode PCMPICommsCreate(void) {
43   MPI_Comm    comm = PC_MPI_COMM_WORLD;
44   PetscMPIInt size, rank, i;
45 
46   PetscFunctionBegin;
47   PetscCallMPI(MPI_Comm_size(comm, &size));
48   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
49   PetscCallMPI(MPI_Comm_rank(comm, &rank));
50   /* comm for size 1 is useful only for debugging */
51   for (i = 0; i < size; i++) {
52     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
53     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
54     PCMPISolveCounts[i] = 0;
55     PCMPIKSPCounts[i]   = 0;
56   }
57   PCMPICommSet = PETSC_TRUE;
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode PCMPICommsDestroy(void) {
62   MPI_Comm    comm = PC_MPI_COMM_WORLD;
63   PetscMPIInt size, rank, i;
64 
65   PetscFunctionBegin;
66   if (!PCMPICommSet) PetscFunctionReturn(0);
67   PetscCallMPI(MPI_Comm_size(comm, &size));
68   PetscCallMPI(MPI_Comm_rank(comm, &rank));
69   for (i = 0; i < size; i++) {
70     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
71   }
72   PCMPICommSet = PETSC_FALSE;
73   PetscFunctionReturn(0);
74 }
75 
76 static PetscErrorCode PCMPICreate(PC pc) {
77   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
78   MPI_Comm    comm = PC_MPI_COMM_WORLD;
79   KSP         ksp;
80   PetscInt    N[2], mincntperrank = 0;
81   PetscMPIInt size;
82   Mat         sA;
83   char       *prefix;
84   PetscMPIInt len = 0;
85 
86   PetscFunctionBegin;
87   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
88   PetscCallMPI(MPI_Comm_size(comm, &size));
89   if (pc) {
90     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
91     PetscCall(PCGetOperators(pc, &sA, &sA));
92     PetscCall(MatGetSize(sA, &N[0], &N[1]));
93   }
94   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
95 
96   /* choose a suitable sized MPI_Comm for the problem to be solved on */
97   if (km) mincntperrank = km->mincntperrank;
98   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
99   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
100   if (comm == MPI_COMM_NULL) {
101     ksp = NULL;
102     PetscFunctionReturn(0);
103   }
104   PetscCall(KSPCreate(comm, &ksp));
105   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
106   if (pc) {
107     size_t slen;
108 
109     PetscCallMPI(MPI_Comm_size(comm, &size));
110     PCMPIKSPCounts[size - 1]++;
111     PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
112     PetscCall(PetscStrlen(prefix, &slen));
113     len = (PetscMPIInt)slen;
114   }
115   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
116   if (len) {
117     if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
118     PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
119     PetscCall(KSPSetOptionsPrefix(ksp, prefix));
120   }
121   PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
122   PetscFunctionReturn(0);
123 }
124 
125 static PetscErrorCode PCMPISetMat(PC pc) {
126   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
127   Mat                A;
128   PetscInt           N[2], n, *ia, *ja, j, bs;
129   Mat                sA;
130   MPI_Comm           comm = PC_MPI_COMM_WORLD;
131   KSP                ksp;
132   PetscLayout        layout;
133   const PetscInt    *IA = NULL, *JA = NULL;
134   const PetscInt    *range;
135   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
136   PetscScalar       *a;
137   const PetscScalar *sa = NULL;
138 
139   PetscFunctionBegin;
140   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
141   if (!ksp) PetscFunctionReturn(0);
142   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
143   if (pc) {
144     PetscCallMPI(MPI_Comm_size(comm, &size));
145     PCMPIMatCounts[size - 1]++;
146     PetscCall(PCGetOperators(pc, &sA, &sA));
147     PetscCall(MatGetSize(sA, &N[0], &N[1]));
148     PetscCall(MatGetBlockSize(sA, &bs));
149     /* need to broadcast symmetry flags etc if set */
150   }
151   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
152   PetscCallMPI(MPI_Bcast(&bs, 1, MPIU_INT, 0, comm));
153 
154   /* determine ownership ranges of matrix */
155   PetscCall(PetscLayoutCreate(comm, &layout));
156   PetscCall(PetscLayoutSetBlockSize(layout, bs));
157   PetscCall(PetscLayoutSetSize(layout, N[0]));
158   PetscCall(PetscLayoutSetUp(layout));
159   PetscCall(PetscLayoutGetLocalSize(layout, &n));
160 
161   /* copy over the matrix nonzero structure and values */
162   if (pc) {
163     NZ      = km->NZ;
164     NZdispl = km->NZdispl;
165     PetscCall(PetscLayoutGetRanges(layout, &range));
166     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
167     for (i = 0; i < size; i++) {
168       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
169       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
170     }
171     displi[0]  = 0;
172     NZdispl[0] = 0;
173     for (j = 1; j < size; j++) {
174       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
175       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
176     }
177     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
178   }
179   PetscCall(PetscLayoutDestroy(&layout));
180   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
181 
182   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
183   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
184   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
185   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
186 
187   if (pc) {
188     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
189     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
190   }
191 
192   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
193   ia[0] = 0;
194   PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A));
195   PetscCall(MatSetBlockSize(A, bs));
196   PetscCall(MatSetOptionsPrefix(A, "mpi_"));
197 
198   PetscCall(PetscFree3(ia, ja, a));
199   PetscCall(KSPSetOperators(ksp, A, A));
200   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
201   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
202     const PetscInt *range;
203 
204     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
205     for (i = 0; i < size; i++) {
206       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
207       km->displ[i]     = (PetscMPIInt)range[i];
208     }
209   }
210   PetscCall(MatDestroy(&A));
211   PetscCall(KSPSetFromOptions(ksp));
212   PetscFunctionReturn(0);
213 }
214 
215 static PetscErrorCode PCMPIUpdateMatValues(PC pc) {
216   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
217   KSP                ksp;
218   Mat                sA, A;
219   MPI_Comm           comm = PC_MPI_COMM_WORLD;
220   PetscScalar       *a;
221   PetscCount         nz;
222   const PetscScalar *sa = NULL;
223   PetscMPIInt        size;
224 
225   PetscFunctionBegin;
226   if (pc) {
227     PetscCall(PCGetOperators(pc, &sA, &sA));
228     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
229   }
230   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
231   if (!ksp) PetscFunctionReturn(0);
232   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
233   PetscCallMPI(MPI_Comm_size(comm, &size));
234   PCMPIMatCounts[size - 1]++;
235   PetscCall(KSPGetOperators(ksp, NULL, &A));
236   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
237   PetscCall(PetscMalloc1(nz, &a));
238   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
239   if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
240   PetscCall(MatUpdateMPIAIJWithArray(A, a));
241   PetscCall(PetscFree(a));
242   PetscFunctionReturn(0);
243 }
244 
245 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) {
246   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
247   KSP                ksp;
248   MPI_Comm           comm = PC_MPI_COMM_WORLD;
249   const PetscScalar *sb   = NULL, *x;
250   PetscScalar       *b, *sx = NULL;
251   PetscInt           n;
252 
253   PetscFunctionBegin;
254   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
255   if (!ksp) PetscFunctionReturn(0);
256   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
257 
258   /* TODO: optimize code to not require building counts/displ everytime */
259 
260   /* scatterv rhs */
261   if (pc) {
262     PetscMPIInt size;
263 
264     PetscCallMPI(MPI_Comm_size(comm, &size));
265     PCMPISolveCounts[size - 1]++;
266     PetscCall(VecGetArrayRead(B, &sb));
267   }
268   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
269   PetscCall(VecGetArray(ksp->vec_rhs, &b));
270   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
271   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
272   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
273 
274   PetscCall(KSPSolve(ksp, NULL, NULL));
275 
276   /* gather solution */
277   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
278   if (pc) PetscCall(VecGetArray(X, &sx));
279   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
280   if (pc) PetscCall(VecRestoreArray(X, &sx));
281   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
282   PetscFunctionReturn(0);
283 }
284 
285 static PetscErrorCode PCMPIDestroy(PC pc) {
286   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
287   KSP      ksp;
288   MPI_Comm comm = PC_MPI_COMM_WORLD;
289 
290   PetscFunctionBegin;
291   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
292   if (!ksp) PetscFunctionReturn(0);
293   PetscCall(KSPDestroy(&ksp));
294   PetscFunctionReturn(0);
295 }
296 
297 /*@C
298      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
299      parallel `KSP` solves and management of parallel `KSP` objects.
300 
301      Logically collective on all MPI ranks except 0
302 
303    Options Database Keys:
304 +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
305 -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
306 
307      Note:
308       This is normally started automatically in `PetscInitialize()` when the option is provided
309 
310      Developer Notes:
311        When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
312        written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
313        (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
314 
315        Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
316 
317      Level: developer
318 
319 .seealso: `PCMPIServerEnd()`, `PCMPI`
320 @*/
321 PetscErrorCode PCMPIServerBegin(void) {
322   PetscMPIInt rank;
323 
324   PetscFunctionBegin;
325   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
326   if (PetscDefined(USE_SINGLE_LIBRARY)) {
327     PetscCall(VecInitializePackage());
328     PetscCall(MatInitializePackage());
329     PetscCall(DMInitializePackage());
330     PetscCall(PCInitializePackage());
331     PetscCall(KSPInitializePackage());
332     PetscCall(SNESInitializePackage());
333     PetscCall(TSInitializePackage());
334     PetscCall(TaoInitializePackage());
335   }
336 
337   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
338   if (rank == 0) {
339     PETSC_COMM_WORLD = PETSC_COMM_SELF;
340     PetscFunctionReturn(0);
341   }
342 
343   while (PETSC_TRUE) {
344     PCMPICommand request = PCMPI_CREATE;
345     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
346     switch (request) {
347     case PCMPI_CREATE: PetscCall(PCMPICreate(NULL)); break;
348     case PCMPI_SET_MAT: PetscCall(PCMPISetMat(NULL)); break;
349     case PCMPI_UPDATE_MAT_VALUES: PetscCall(PCMPIUpdateMatValues(NULL)); break;
350     case PCMPI_VIEW:
351       // PetscCall(PCMPIView(NULL));
352       break;
353     case PCMPI_SOLVE: PetscCall(PCMPISolve(NULL, NULL, NULL)); break;
354     case PCMPI_DESTROY: PetscCall(PCMPIDestroy(NULL)); break;
355     case PCMPI_EXIT:
356       PetscCall(PetscFinalize());
357       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
358       break;
359     default: break;
360     }
361   }
362   PetscFunctionReturn(0);
363 }
364 
365 /*@C
366      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
367      parallel KSP solves and management of parallel `KSP` objects.
368 
369      Logically collective on all MPI ranks except 0
370 
371      Note:
372       This is normally ended automatically in `PetscFinalize()` when the option is provided
373 
374      Level: developer
375 
376 .seealso: `PCMPIServerBegin()`, `PCMPI`
377 @*/
378 PetscErrorCode PCMPIServerEnd(void) {
379   PCMPICommand request = PCMPI_EXIT;
380 
381   PetscFunctionBegin;
382   if (PetscGlobalRank == 0) {
383     PetscViewer       viewer = NULL;
384     PetscViewerFormat format;
385 
386     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
387     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
388     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
389     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
390     PetscOptionsEnd();
391     if (viewer) {
392       PetscBool isascii;
393 
394       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
395       if (isascii) {
396         PetscMPIInt size;
397         PetscInt    i, ksp = 0, mat = 0, solve = 0;
398 
399         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
400         for (i = 0; i < size; i++) {
401           ksp += PCMPIKSPCounts[i];
402           mat += PCMPIMatCounts[i];
403           solve += PCMPISolveCounts[i];
404         }
405         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server:\n"));
406         PetscCall(PetscViewerASCIIPrintf(viewer, "  Parallel KSPs  %" PetscInt_FMT " Mats  %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", ksp, mat, solve));
407         PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential KSPs  %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", PCMPIKSPCountsSeq, PCMPISolveCountsSeq));
408       }
409       PetscCall(PetscViewerDestroy(&viewer));
410     }
411   }
412   PetscCall(PCMPICommsDestroy());
413   PetscFunctionReturn(0);
414 }
415 
416 /*
417     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
418     because, for example, the problem is small. This version is more efficient because it does not require copying any data
419 */
420 static PetscErrorCode PCSetUp_Seq(PC pc) {
421   PC_MPI     *km = (PC_MPI *)pc->data;
422   Mat         sA;
423   const char *prefix;
424 
425   PetscFunctionBegin;
426   PetscCall(PCGetOperators(pc, NULL, &sA));
427   PetscCall(PCGetOptionsPrefix(pc, &prefix));
428   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
429   PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
430   PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
431   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
432   PetscCall(KSPSetFromOptions(km->ksps[0]));
433   PetscCall(KSPSetUp(km->ksps[0]));
434   PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
435   PCMPIKSPCountsSeq++;
436   PetscFunctionReturn(0);
437 }
438 
439 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) {
440   PC_MPI *km = (PC_MPI *)pc->data;
441 
442   PetscFunctionBegin;
443   PetscCall(KSPSolve(km->ksps[0], b, x));
444   PCMPISolveCountsSeq++;
445   PetscFunctionReturn(0);
446 }
447 
448 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) {
449   PC_MPI     *km = (PC_MPI *)pc->data;
450   const char *prefix;
451 
452   PetscFunctionBegin;
453   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
454   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
455   PetscCall(PCGetOptionsPrefix(pc, &prefix));
456   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix));
457   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n"));
458   PetscFunctionReturn(0);
459 }
460 
461 static PetscErrorCode PCDestroy_Seq(PC pc) {
462   PC_MPI *km = (PC_MPI *)pc->data;
463 
464   PetscFunctionBegin;
465   PetscCall(KSPDestroy(&km->ksps[0]));
466   PetscCall(PetscFree(pc->data));
467   PetscFunctionReturn(0);
468 }
469 
470 /*
471      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
472      right hand side to the parallel PC
473 */
474 static PetscErrorCode PCSetUp_MPI(PC pc) {
475   PC_MPI      *km = (PC_MPI *)pc->data;
476   PetscMPIInt  rank, size;
477   PCMPICommand request;
478   PetscBool    newmatrix = PETSC_FALSE;
479 
480   PetscFunctionBegin;
481   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
482   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
483   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
484 
485   if (!pc->setupcalled) {
486     if (!km->alwaysuseserver) {
487       PetscInt n;
488       Mat      sA;
489       /* short circuit for small systems */
490       PetscCall(PCGetOperators(pc, &sA, &sA));
491       PetscCall(MatGetSize(sA, &n, NULL));
492       if (n < 2 * km->mincntperrank - 1 || size == 1) {
493         pc->ops->setup   = NULL;
494         pc->ops->apply   = PCApply_Seq;
495         pc->ops->destroy = PCDestroy_Seq;
496         pc->ops->view    = PCView_Seq;
497         PetscCall(PCSetUp_Seq(pc));
498         PetscFunctionReturn(0);
499       }
500     }
501 
502     request = PCMPI_CREATE;
503     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
504     PetscCall(PCMPICreate(pc));
505     newmatrix = PETSC_TRUE;
506   }
507   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
508 
509   if (newmatrix) {
510     PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n");
511     request = PCMPI_SET_MAT;
512     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
513     PetscCall(PCMPISetMat(pc));
514   } else {
515     PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n");
516     request = PCMPI_UPDATE_MAT_VALUES;
517     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
518     PetscCall(PCMPIUpdateMatValues(pc));
519   }
520   PetscFunctionReturn(0);
521 }
522 
523 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) {
524   PCMPICommand request = PCMPI_SOLVE;
525 
526   PetscFunctionBegin;
527   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
528   PetscCall(PCMPISolve(pc, b, x));
529   PetscFunctionReturn(0);
530 }
531 
532 PetscErrorCode PCDestroy_MPI(PC pc) {
533   PCMPICommand request = PCMPI_DESTROY;
534 
535   PetscFunctionBegin;
536   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
537   PetscCall(PCMPIDestroy(pc));
538   PetscCall(PetscFree(pc->data));
539   PetscFunctionReturn(0);
540 }
541 
542 /*
543      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
544 */
545 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) {
546   PC_MPI     *km = (PC_MPI *)pc->data;
547   MPI_Comm    comm;
548   PetscMPIInt size;
549   const char *prefix;
550 
551   PetscFunctionBegin;
552   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
553   PetscCallMPI(MPI_Comm_size(comm, &size));
554   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
555   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
556   PetscCall(PCGetOptionsPrefix(pc, &prefix));
557   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix));
558   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n"));
559   PetscFunctionReturn(0);
560 }
561 
562 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) {
563   PC_MPI *km = (PC_MPI *)pc->data;
564 
565   PetscFunctionBegin;
566   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
567   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
568   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
569   PetscOptionsHeadEnd();
570   PetscFunctionReturn(0);
571 }
572 
573 /*MC
574      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
575 
576    Level: beginner
577 
578    Options Database Keys:
579 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
580 .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
581 .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
582 -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
583 
584    Notes:
585    The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_
586 
587    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
588    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
589 
590    It can be particularly useful for user OpenMP code or potentially user GPU code.
591 
592    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
593    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.
594 
595 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
596 M*/
597 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) {
598   PC_MPI *km;
599 
600   PetscFunctionBegin;
601   PetscCall(PetscNew(&km));
602   pc->data = (void *)km;
603 
604   km->mincntperrank = 10000;
605 
606   pc->ops->setup          = PCSetUp_MPI;
607   pc->ops->apply          = PCApply_MPI;
608   pc->ops->destroy        = PCDestroy_MPI;
609   pc->ops->view           = PCView_MPI;
610   pc->ops->setfromoptions = PCSetFromOptions_MPI;
611   PetscFunctionReturn(0);
612 }
613