xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision cf906205f1df435953e2a75894dc7795d3ff0aed)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 
16 #define PC_MPI_MAX_RANKS  256
17 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
18 
19 typedef struct {
20   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
21   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
22   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
23   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
24   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
25 } PC_MPI;
26 
27 typedef enum {
28   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
29   PCMPI_CREATE,
30   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
31   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
32   PCMPI_SOLVE,
33   PCMPI_VIEW,
34   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
35 } PCMPICommand;
36 
37 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
38 static PetscBool PCMPICommSet = PETSC_FALSE;
39 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
40 
41 static PetscErrorCode PCMPICommsCreate(void) {
42   MPI_Comm    comm = PC_MPI_COMM_WORLD;
43   PetscMPIInt size, rank, i;
44 
45   PetscFunctionBegin;
46   PetscCallMPI(MPI_Comm_size(comm, &size));
47   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
48   PetscCallMPI(MPI_Comm_rank(comm, &rank));
49   /* comm for size 1 is useful only for debugging */
50   for (i = 0; i < size; i++) {
51     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
52     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
53     PCMPISolveCounts[i] = 0;
54     PCMPIKSPCounts[i]   = 0;
55   }
56   PCMPICommSet = PETSC_TRUE;
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode PCMPICommsDestroy(void) {
61   MPI_Comm    comm = PC_MPI_COMM_WORLD;
62   PetscMPIInt size, rank, i;
63 
64   PetscFunctionBegin;
65   if (!PCMPICommSet) PetscFunctionReturn(0);
66   PetscCallMPI(MPI_Comm_size(comm, &size));
67   PetscCallMPI(MPI_Comm_rank(comm, &rank));
68   for (i = 0; i < size; i++) {
69     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
70   }
71   PCMPICommSet = PETSC_FALSE;
72   PetscFunctionReturn(0);
73 }
74 
75 static PetscErrorCode PCMPICreate(PC pc) {
76   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
77   MPI_Comm    comm = PC_MPI_COMM_WORLD;
78   KSP         ksp;
79   PetscInt    N[2], mincntperrank = 0;
80   PetscMPIInt size;
81   Mat         sA;
82   char       *prefix;
83   PetscMPIInt len = 0;
84 
85   PetscFunctionBegin;
86   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
87   PetscCallMPI(MPI_Comm_size(comm, &size));
88   if (pc) {
89     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
90     PetscCall(PCGetOperators(pc, &sA, &sA));
91     PetscCall(MatGetSize(sA, &N[0], &N[1]));
92   }
93   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
94 
95   /* choose a suitable sized MPI_Comm for the problem to be solved on */
96   if (km) mincntperrank = km->mincntperrank;
97   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
98   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
99   if (comm == MPI_COMM_NULL) {
100     ksp = NULL;
101     PetscFunctionReturn(0);
102   }
103   PetscCall(KSPCreate(comm, &ksp));
104   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
105   if (pc) {
106     size_t slen;
107 
108     PetscCallMPI(MPI_Comm_size(comm, &size));
109     PCMPIKSPCounts[size - 1]++;
110     PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
111     PetscCall(PetscStrlen(prefix, &slen));
112     len = (PetscMPIInt)slen;
113   }
114   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
115   if (len) {
116     if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
117     PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
118     PetscCall(KSPSetOptionsPrefix(ksp, prefix));
119   }
120   PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
121   PetscFunctionReturn(0);
122 }
123 
124 static PetscErrorCode PCMPISetMat(PC pc) {
125   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
126   Mat                A;
127   PetscInt           N[2], n, *ia, *ja, j;
128   Mat                sA;
129   MPI_Comm           comm = PC_MPI_COMM_WORLD;
130   KSP                ksp;
131   PetscLayout        layout;
132   const PetscInt    *IA = NULL, *JA = NULL;
133   const PetscInt    *range;
134   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
135   PetscScalar       *a;
136   const PetscScalar *sa = NULL;
137 
138   PetscFunctionBegin;
139   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
140   if (!ksp) PetscFunctionReturn(0);
141   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
142   if (pc) {
143     PetscCallMPI(MPI_Comm_size(comm, &size));
144     PCMPIMatCounts[size - 1]++;
145     PetscCall(PCGetOperators(pc, &sA, &sA));
146     PetscCall(MatGetSize(sA, &N[0], &N[1]));
147   }
148   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
149 
150   /* determine ownership ranges of matrix */
151   PetscCall(PetscLayoutCreate(comm, &layout));
152   PetscCall(PetscLayoutSetBlockSize(layout, 1));
153   PetscCall(PetscLayoutSetSize(layout, N[0]));
154   PetscCall(PetscLayoutSetUp(layout));
155   PetscCall(PetscLayoutGetLocalSize(layout, &n));
156 
157   /* copy over the matrix nonzero structure and values */
158   if (pc) {
159     NZ      = km->NZ;
160     NZdispl = km->NZdispl;
161     PetscCall(PetscLayoutGetRanges(layout, &range));
162     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
163     for (i = 0; i < size; i++) {
164       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
165       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
166     }
167     displi[0]  = 0;
168     NZdispl[0] = 0;
169     for (j = 1; j < size; j++) {
170       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
171       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
172     }
173     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
174   }
175   PetscCall(PetscLayoutDestroy(&layout));
176   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
177 
178   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
179   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
180   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
181   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
182 
183   if (pc) {
184     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
185     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
186   }
187 
188   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
189   ia[0] = 0;
190   PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A));
191   PetscCall(MatSetOptionsPrefix(A, "mpi_"));
192 
193   PetscCall(PetscFree3(ia, ja, a));
194   PetscCall(KSPSetOperators(ksp, A, A));
195   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
196   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
197     const PetscInt *range;
198 
199     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
200     for (i = 0; i < size; i++) {
201       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
202       km->displ[i]     = (PetscMPIInt)range[i];
203     }
204   }
205   PetscCall(MatDestroy(&A));
206   PetscCall(KSPSetFromOptions(ksp));
207   PetscFunctionReturn(0);
208 }
209 
210 static PetscErrorCode PCMPIUpdateMatValues(PC pc) {
211   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
212   KSP                ksp;
213   Mat                sA, A;
214   MPI_Comm           comm = PC_MPI_COMM_WORLD;
215   PetscScalar       *a;
216   PetscCount         nz;
217   const PetscScalar *sa = NULL;
218   PetscMPIInt        size;
219 
220   PetscFunctionBegin;
221   if (pc) {
222     PetscCall(PCGetOperators(pc, &sA, &sA));
223     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
224   }
225   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
226   if (!ksp) PetscFunctionReturn(0);
227   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
228   PetscCallMPI(MPI_Comm_size(comm, &size));
229   PCMPIMatCounts[size - 1]++;
230   PetscCall(KSPGetOperators(ksp, NULL, &A));
231   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
232   PetscCall(PetscMalloc1(nz, &a));
233   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
234   if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
235   PetscCall(MatUpdateMPIAIJWithArray(A, a));
236   PetscCall(PetscFree(a));
237   PetscFunctionReturn(0);
238 }
239 
240 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) {
241   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
242   KSP                ksp;
243   MPI_Comm           comm = PC_MPI_COMM_WORLD;
244   const PetscScalar *sb   = NULL, *x;
245   PetscScalar       *b, *sx = NULL;
246   PetscInt           n;
247 
248   PetscFunctionBegin;
249   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
250   if (!ksp) PetscFunctionReturn(0);
251   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
252 
253   /* TODO: optimize code to not require building counts/displ everytime */
254 
255   /* scatterv rhs */
256   if (pc) {
257     PetscMPIInt size;
258 
259     PetscCallMPI(MPI_Comm_size(comm, &size));
260     PCMPISolveCounts[size - 1]++;
261     PetscCall(VecGetArrayRead(B, &sb));
262   }
263   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
264   PetscCall(VecGetArray(ksp->vec_rhs, &b));
265   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
266   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
267   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
268 
269   PetscCall(KSPSolve(ksp, NULL, NULL));
270 
271   /* gather solution */
272   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
273   if (pc) PetscCall(VecGetArray(X, &sx));
274   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
275   if (pc) PetscCall(VecRestoreArray(X, &sx));
276   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
277   PetscFunctionReturn(0);
278 }
279 
280 static PetscErrorCode PCMPIDestroy(PC pc) {
281   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
282   KSP      ksp;
283   MPI_Comm comm = PC_MPI_COMM_WORLD;
284 
285   PetscFunctionBegin;
286   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
287   if (!ksp) PetscFunctionReturn(0);
288   PetscCall(KSPDestroy(&ksp));
289   PetscFunctionReturn(0);
290 }
291 
292 /*@C
293      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
294      parallel KSP solves and management of parallel KSP objects.
295 
296      Logically collective on all MPI ranks except 0
297 
298    Options Database:
299 +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
300 -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
301 
302      Notes:
303       This is normally started automatically in `PetscInitialize()` when the option is provided
304 
305      Developer Notes:
306        When called on rank zero this sets PETSC_COMM_WORLD to PETSC_COMM_SELF to allow a main program
307        written with PETSC_COMM_WORLD to run correctly on the single rank while all the ranks
308        (that would normally be sharing PETSC_COMM_WORLD) to run the solver server.
309 
310        Can this be integrated into the PetscDevice abstraction that is currently being developed?
311 
312      Level: developer
313 
314 .seealso: PCMPIServerEnd(), PCMPI
315 @*/
316 PetscErrorCode PCMPIServerBegin(void) {
317   PetscMPIInt rank;
318 
319   PetscFunctionBegin;
320   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
321   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
322   if (rank == 0) {
323     PETSC_COMM_WORLD = PETSC_COMM_SELF;
324     PetscFunctionReturn(0);
325   }
326 
327   while (PETSC_TRUE) {
328     PCMPICommand request = PCMPI_CREATE;
329     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
330     switch (request) {
331     case PCMPI_CREATE: PetscCall(PCMPICreate(NULL)); break;
332     case PCMPI_SET_MAT: PetscCall(PCMPISetMat(NULL)); break;
333     case PCMPI_UPDATE_MAT_VALUES: PetscCall(PCMPIUpdateMatValues(NULL)); break;
334     case PCMPI_VIEW:
335       // PetscCall(PCMPIView(NULL));
336       break;
337     case PCMPI_SOLVE: PetscCall(PCMPISolve(NULL, NULL, NULL)); break;
338     case PCMPI_DESTROY: PetscCall(PCMPIDestroy(NULL)); break;
339     case PCMPI_EXIT:
340       PetscCall(PetscFinalize());
341       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
342       break;
343     default: break;
344     }
345   }
346   PetscFunctionReturn(0);
347 }
348 
349 /*@C
350      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
351      parallel KSP solves and management of parallel KSP objects.
352 
353      Logically collective on all MPI ranks except 0
354 
355      Notes:
356       This is normally ended automatically in `PetscFinalize()` when the option is provided
357 
358      Level: developer
359 
360 .seealso: PCMPIServerBegin(), PCMPI
361 @*/
362 PetscErrorCode PCMPIServerEnd(void) {
363   PCMPICommand request = PCMPI_EXIT;
364 
365   PetscFunctionBegin;
366   if (PetscGlobalRank == 0) {
367     PetscViewer       viewer = NULL;
368     PetscViewerFormat format;
369 
370     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
371     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
372     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
373     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
374     PetscOptionsEnd();
375     if (viewer) {
376       PetscBool isascii;
377 
378       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
379       if (isascii) {
380         PetscMPIInt size;
381         PetscInt    i, ksp = 0, mat = 0, solve = 0;
382 
383         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
384         for (i = 0; i < size; i++) {
385           ksp += PCMPIKSPCounts[i];
386           mat += PCMPIMatCounts[i];
387           solve += PCMPISolveCounts[i];
388         }
389         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server:\n"));
390         PetscCall(PetscViewerASCIIPrintf(viewer, "  Parallel KSPs  %" PetscInt_FMT " Mats  %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", ksp, mat, solve));
391         PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential KSPs  %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", PCMPIKSPCountsSeq, PCMPISolveCountsSeq));
392       }
393       PetscCall(PetscViewerDestroy(&viewer));
394     }
395   }
396   PetscCall(PCMPICommsDestroy());
397   PetscFunctionReturn(0);
398 }
399 
400 /*
401     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
402     because, for example, the problem is small. This version is more efficient because it does not require copying any data
403 */
404 static PetscErrorCode PCSetUp_Seq(PC pc) {
405   PC_MPI     *km = (PC_MPI *)pc->data;
406   Mat         sA;
407   const char *prefix;
408 
409   PetscFunctionBegin;
410   PetscCall(PCGetOperators(pc, NULL, &sA));
411   PetscCall(PCGetOptionsPrefix(pc, &prefix));
412   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
413   PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
414   PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
415   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
416   PetscCall(KSPSetFromOptions(km->ksps[0]));
417   PetscCall(KSPSetUp(km->ksps[0]));
418   PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
419   PCMPIKSPCountsSeq++;
420   PetscFunctionReturn(0);
421 }
422 
423 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) {
424   PC_MPI *km = (PC_MPI *)pc->data;
425 
426   PetscFunctionBegin;
427   PetscCall(KSPSolve(km->ksps[0], b, x));
428   PCMPISolveCountsSeq++;
429   PetscFunctionReturn(0);
430 }
431 
432 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) {
433   PC_MPI     *km = (PC_MPI *)pc->data;
434   const char *prefix;
435 
436   PetscFunctionBegin;
437   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
438   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
439   PetscCall(PCGetOptionsPrefix(pc, &prefix));
440   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix));
441   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n"));
442   PetscFunctionReturn(0);
443 }
444 
445 static PetscErrorCode PCDestroy_Seq(PC pc) {
446   PC_MPI *km = (PC_MPI *)pc->data;
447 
448   PetscFunctionBegin;
449   PetscCall(KSPDestroy(&km->ksps[0]));
450   PetscCall(PetscFree(pc->data));
451   PetscFunctionReturn(0);
452 }
453 
454 /*
455      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
456      right hand side to the parallel PC
457 */
458 static PetscErrorCode PCSetUp_MPI(PC pc) {
459   PC_MPI      *km = (PC_MPI *)pc->data;
460   PetscMPIInt  rank, size;
461   PCMPICommand request;
462   PetscBool    newmatrix = PETSC_FALSE;
463 
464   PetscFunctionBegin;
465   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
466   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
467   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
468 
469   if (!pc->setupcalled) {
470     if (!km->alwaysuseserver) {
471       PetscInt n;
472       Mat      sA;
473       /* short circuit for small systems */
474       PetscCall(PCGetOperators(pc, &sA, &sA));
475       PetscCall(MatGetSize(sA, &n, NULL));
476       if (n < 2 * km->mincntperrank - 1 || size == 1) {
477         pc->ops->setup   = NULL;
478         pc->ops->apply   = PCApply_Seq;
479         pc->ops->destroy = PCDestroy_Seq;
480         pc->ops->view    = PCView_Seq;
481         PetscCall(PCSetUp_Seq(pc));
482         PetscFunctionReturn(0);
483       }
484     }
485 
486     request = PCMPI_CREATE;
487     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
488     PetscCall(PCMPICreate(pc));
489     newmatrix = PETSC_TRUE;
490   }
491   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
492 
493   if (newmatrix) {
494     PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n");
495     request = PCMPI_SET_MAT;
496     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
497     PetscCall(PCMPISetMat(pc));
498   } else {
499     PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n");
500     request = PCMPI_UPDATE_MAT_VALUES;
501     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
502     PetscCall(PCMPIUpdateMatValues(pc));
503   }
504   PetscFunctionReturn(0);
505 }
506 
507 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) {
508   PCMPICommand request = PCMPI_SOLVE;
509 
510   PetscFunctionBegin;
511   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
512   PetscCall(PCMPISolve(pc, b, x));
513   PetscFunctionReturn(0);
514 }
515 
516 PetscErrorCode PCDestroy_MPI(PC pc) {
517   PCMPICommand request = PCMPI_DESTROY;
518 
519   PetscFunctionBegin;
520   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
521   PetscCall(PCMPIDestroy(pc));
522   PetscCall(PetscFree(pc->data));
523   PetscFunctionReturn(0);
524 }
525 
526 /*
527      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
528 */
529 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) {
530   PC_MPI     *km = (PC_MPI *)pc->data;
531   MPI_Comm    comm;
532   PetscMPIInt size;
533   const char *prefix;
534 
535   PetscFunctionBegin;
536   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
537   PetscCallMPI(MPI_Comm_size(comm, &size));
538   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
539   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
540   PetscCall(PCGetOptionsPrefix(pc, &prefix));
541   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix));
542   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n"));
543   PetscFunctionReturn(0);
544 }
545 
546 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) {
547   PC_MPI *km = (PC_MPI *)pc->data;
548 
549   PetscFunctionBegin;
550   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
551   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
552   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
553   PetscOptionsHeadEnd();
554   PetscFunctionReturn(0);
555 }
556 
557 /*MC
558      PCMPI - Calls an MPI parallel KSP to solve a linear system from user code running on one process
559 
560    Level: beginner
561 
562    Options Database:
563 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
564 .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
565 .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
566 -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
567 
568    Notes:
569    The options database prefix for the MPI parallel KSP and PC is -mpi_
570 
571    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
572    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
573 
574    It can be particularly useful for user OpenMP code or potentially user GPU code.
575 
576    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a KSP with the options prefix of -mpi)
577    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the KSP directly.
578 
579 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
580 
581 M*/
582 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) {
583   PC_MPI *km;
584 
585   PetscFunctionBegin;
586   PetscCall(PetscNewLog(pc, &km));
587   pc->data = (void *)km;
588 
589   km->mincntperrank = 10000;
590 
591   pc->ops->setup          = PCSetUp_MPI;
592   pc->ops->apply          = PCApply_MPI;
593   pc->ops->destroy        = PCDestroy_MPI;
594   pc->ops->view           = PCView_MPI;
595   pc->ops->setfromoptions = PCSetFromOptions_MPI;
596   PetscFunctionReturn(0);
597 }
598