xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 66af8762ec03dbef0e079729eb2a1734a35ed7ff)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petsc.h>
16 
17 #define PC_MPI_MAX_RANKS  256
18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
19 
20 typedef struct {
21   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26 } PC_MPI;
27 
28 typedef enum {
29   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30   PCMPI_CREATE,
31   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
32   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33   PCMPI_SOLVE,
34   PCMPI_VIEW,
35   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36 } PCMPICommand;
37 
38 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
39 static PetscBool PCMPICommSet = PETSC_FALSE;
40 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41 static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
42 
43 static PetscErrorCode PCMPICommsCreate(void)
44 {
45   MPI_Comm    comm = PC_MPI_COMM_WORLD;
46   PetscMPIInt size, rank, i;
47 
48   PetscFunctionBegin;
49   PetscCallMPI(MPI_Comm_size(comm, &size));
50   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
51   PetscCallMPI(MPI_Comm_rank(comm, &rank));
52   /* comm for size 1 is useful only for debugging */
53   for (i = 0; i < size; i++) {
54     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
55     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
56     PCMPISolveCounts[i] = 0;
57     PCMPIKSPCounts[i]   = 0;
58     PCMPIIterations[i]  = 0;
59     PCMPISizes[i]       = 0;
60   }
61   PCMPICommSet = PETSC_TRUE;
62   PetscFunctionReturn(PETSC_SUCCESS);
63 }
64 
65 PetscErrorCode PCMPICommsDestroy(void)
66 {
67   MPI_Comm    comm = PC_MPI_COMM_WORLD;
68   PetscMPIInt size, rank, i;
69 
70   PetscFunctionBegin;
71   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
72   PetscCallMPI(MPI_Comm_size(comm, &size));
73   PetscCallMPI(MPI_Comm_rank(comm, &rank));
74   for (i = 0; i < size; i++) {
75     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
76   }
77   PCMPICommSet = PETSC_FALSE;
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 static PetscErrorCode PCMPICreate(PC pc)
82 {
83   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
84   MPI_Comm    comm = PC_MPI_COMM_WORLD;
85   KSP         ksp;
86   PetscInt    N[2], mincntperrank = 0;
87   PetscMPIInt size;
88   Mat         sA;
89   char       *cprefix = NULL;
90   PetscMPIInt len     = 0;
91 
92   PetscFunctionBegin;
93   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
94   PetscCallMPI(MPI_Comm_size(comm, &size));
95   if (pc) {
96     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
97     PetscCall(PCGetOperators(pc, &sA, &sA));
98     PetscCall(MatGetSize(sA, &N[0], &N[1]));
99   }
100   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
101 
102   /* choose a suitable sized MPI_Comm for the problem to be solved on */
103   if (km) mincntperrank = km->mincntperrank;
104   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106   if (comm == MPI_COMM_NULL) {
107     ksp = NULL;
108     PetscFunctionReturn(PETSC_SUCCESS);
109   }
110   PetscCall(PetscLogStagePush(PCMPIStage));
111   PetscCall(KSPCreate(comm, &ksp));
112   PetscCall(KSPSetNestLevel(ksp, 1));
113   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
114   PetscCall(PetscLogStagePop());
115   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
116   if (pc) {
117     size_t      slen;
118     const char *prefix = NULL;
119     char       *found  = NULL;
120 
121     PetscCallMPI(MPI_Comm_size(comm, &size));
122     PCMPIKSPCounts[size - 1]++;
123     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
124     PetscCall(PCGetOptionsPrefix(pc, &prefix));
125     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
126     PetscCall(PetscStrallocpy(prefix, &cprefix));
127     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
128     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
129     *found = 0;
130     PetscCall(PetscStrlen(cprefix, &slen));
131     len = (PetscMPIInt)slen;
132   }
133   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
134   if (len) {
135     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
136     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
137     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
138   }
139   PetscCall(PetscFree(cprefix));
140   PetscFunctionReturn(PETSC_SUCCESS);
141 }
142 
143 static PetscErrorCode PCMPISetMat(PC pc)
144 {
145   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
146   Mat                A;
147   PetscInt           m, n, *ia, *ja, j, bs;
148   Mat                sA;
149   MPI_Comm           comm = PC_MPI_COMM_WORLD;
150   KSP                ksp;
151   PetscLayout        layout;
152   const PetscInt    *IA = NULL, *JA = NULL;
153   const PetscInt    *range;
154   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
155   PetscScalar       *a;
156   const PetscScalar *sa               = NULL;
157   PetscInt           matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0};
158   char              *cprefix;
159 
160   PetscFunctionBegin;
161   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
162   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
163   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
164   if (pc) {
165     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
166     const char *prefix;
167     size_t      clen;
168 
169     PetscCallMPI(MPI_Comm_size(comm, &size));
170     PCMPIMatCounts[size - 1]++;
171     PetscCall(PCGetOperators(pc, &sA, &sA));
172     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
173     PetscCall(MatGetBlockSize(sA, &bs));
174     matproperties[2] = bs;
175     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
176     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
177     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
178     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
179     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
180     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
181     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
182     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
183     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
184     PetscCall(MatGetOptionsPrefix(sA, &prefix));
185     PetscCall(PetscStrallocpy(prefix, &cprefix));
186     PetscCall(PetscStrlen(cprefix, &clen));
187     matproperties[7] = (PetscInt)clen;
188   }
189   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
190 
191   /* determine ownership ranges of matrix columns */
192   PetscCall(PetscLayoutCreate(comm, &layout));
193   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
194   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
195   PetscCall(PetscLayoutSetUp(layout));
196   PetscCall(PetscLayoutGetLocalSize(layout, &n));
197   PetscCall(PetscLayoutDestroy(&layout));
198 
199   /* determine ownership ranges of matrix rows */
200   PetscCall(PetscLayoutCreate(comm, &layout));
201   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
202   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
203   PetscCall(PetscLayoutSetUp(layout));
204   PetscCall(PetscLayoutGetLocalSize(layout, &m));
205 
206   /* copy over the matrix nonzero structure and values */
207   if (pc) {
208     NZ      = km->NZ;
209     NZdispl = km->NZdispl;
210     PetscCall(PetscLayoutGetRanges(layout, &range));
211     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
212     for (i = 0; i < size; i++) {
213       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
214       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
215     }
216     displi[0]  = 0;
217     NZdispl[0] = 0;
218     for (j = 1; j < size; j++) {
219       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
220       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
221     }
222     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
223   }
224   PetscCall(PetscLayoutDestroy(&layout));
225   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
226 
227   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
228   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
229   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
230   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
231 
232   if (pc) {
233     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
234     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
235   }
236 
237   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
238   ia[0] = 0;
239   PetscCall(PetscLogStagePush(PCMPIStage));
240   PetscCall(MatCreate(comm, &A));
241   if (matproperties[7] > 0) {
242     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
243     PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm));
244     PetscCall(MatSetOptionsPrefix(A, cprefix));
245     PetscCall(PetscFree(cprefix));
246   }
247   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
248   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
249   PetscCall(MatSetType(A, MATMPIAIJ));
250   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
251   PetscCall(MatSetBlockSize(A, matproperties[2]));
252 
253   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
254   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
255   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
256   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
257 
258   PetscCall(PetscFree3(ia, ja, a));
259   PetscCall(KSPSetOperators(ksp, A, A));
260   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
261   PetscCall(PetscLogStagePop());
262   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
263     const PetscInt *range;
264 
265     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
266     for (i = 0; i < size; i++) {
267       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
268       km->displ[i]     = (PetscMPIInt)range[i];
269     }
270   }
271   PetscCall(MatDestroy(&A));
272   PetscCall(KSPSetFromOptions(ksp));
273   PetscFunctionReturn(PETSC_SUCCESS);
274 }
275 
276 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
277 {
278   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
279   KSP                ksp;
280   Mat                sA, A;
281   MPI_Comm           comm = PC_MPI_COMM_WORLD;
282   PetscScalar       *a;
283   PetscCount         nz;
284   const PetscScalar *sa = NULL;
285   PetscMPIInt        size;
286   PetscInt           matproperties[4] = {0, 0, 0, 0};
287 
288   PetscFunctionBegin;
289   if (pc) {
290     PetscCall(PCGetOperators(pc, &sA, &sA));
291     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
292   }
293   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
294   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
295   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
296   PetscCallMPI(MPI_Comm_size(comm, &size));
297   PCMPIMatCounts[size - 1]++;
298   PetscCall(KSPGetOperators(ksp, NULL, &A));
299   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
300   PetscCall(PetscMalloc1(nz, &a));
301   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
302   if (pc) {
303     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
304 
305     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
306 
307     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
308     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
309     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
310     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
311     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
312     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
313     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
314     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
315   }
316   PetscCall(MatUpdateMPIAIJWithArray(A, a));
317   PetscCall(PetscFree(a));
318   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
319   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
320   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
321   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
322   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
323   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
328 {
329   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
330   KSP                ksp;
331   MPI_Comm           comm = PC_MPI_COMM_WORLD;
332   const PetscScalar *sb   = NULL, *x;
333   PetscScalar       *b, *sx = NULL;
334   PetscInt           its, n;
335   PetscMPIInt        size;
336 
337   PetscFunctionBegin;
338   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
339   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
340   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
341 
342   /* TODO: optimize code to not require building counts/displ every time */
343 
344   /* scatterv rhs */
345   PetscCallMPI(MPI_Comm_size(comm, &size));
346   if (pc) {
347     PetscInt N;
348 
349     PCMPISolveCounts[size - 1]++;
350     PetscCall(MatGetSize(pc->pmat, &N, NULL));
351     PCMPISizes[size - 1] += N;
352     PetscCall(VecGetArrayRead(B, &sb));
353   }
354   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
355   PetscCall(VecGetArray(ksp->vec_rhs, &b));
356   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
357   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
358   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
359 
360   PetscCall(PetscLogStagePush(PCMPIStage));
361   PetscCall(KSPSolve(ksp, NULL, NULL));
362   PetscCall(PetscLogStagePop());
363   PetscCall(KSPGetIterationNumber(ksp, &its));
364   PCMPIIterations[size - 1] += its;
365 
366   /* gather solution */
367   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
368   if (pc) PetscCall(VecGetArray(X, &sx));
369   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
370   if (pc) PetscCall(VecRestoreArray(X, &sx));
371   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
372   PetscFunctionReturn(PETSC_SUCCESS);
373 }
374 
375 static PetscErrorCode PCMPIDestroy(PC pc)
376 {
377   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
378   KSP      ksp;
379   MPI_Comm comm = PC_MPI_COMM_WORLD;
380 
381   PetscFunctionBegin;
382   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
383   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
384   PetscCall(KSPDestroy(&ksp));
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387 
388 PetscBool PCMPIServerActive = PETSC_FALSE;
389 
390 /*@C
391   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
392   parallel `KSP` solves and management of parallel `KSP` objects.
393 
394   Logically Collective on all MPI processes except rank 0
395 
396   Options Database Keys:
397 + -mpi_linear_solver_server      - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
398 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
399 
400   Level: developer
401 
402   Note:
403   This is normally started automatically in `PetscInitialize()` when the option is provided
404 
405   See `PCMPI` for information on using the solver with a `KSP` object
406 
407   Developer Notes:
408   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
409   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
410   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
411 
412   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
413 
414   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
415 
416   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
417 
418   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
419   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
420 
421   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
422   all MPI processes but the user callback only runs on one MPI process per node.
423 
424   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
425   the `MPI_Comm` argument from PETSc calls.
426 
427 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
428 @*/
429 PetscErrorCode PCMPIServerBegin(void)
430 {
431   PetscMPIInt rank;
432 
433   PetscFunctionBegin;
434   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
435   if (PetscDefined(USE_SINGLE_LIBRARY)) {
436     PetscCall(VecInitializePackage());
437     PetscCall(MatInitializePackage());
438     PetscCall(DMInitializePackage());
439     PetscCall(PCInitializePackage());
440     PetscCall(KSPInitializePackage());
441     PetscCall(SNESInitializePackage());
442     PetscCall(TSInitializePackage());
443     PetscCall(TaoInitializePackage());
444   }
445 
446   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
447   if (rank == 0) {
448     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
449     PCMPIServerActive = PETSC_TRUE;
450     PetscFunctionReturn(PETSC_SUCCESS);
451   }
452 
453   while (PETSC_TRUE) {
454     PCMPICommand request = PCMPI_CREATE;
455     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
456     switch (request) {
457     case PCMPI_CREATE:
458       PetscCall(PCMPICreate(NULL));
459       break;
460     case PCMPI_SET_MAT:
461       PetscCall(PCMPISetMat(NULL));
462       break;
463     case PCMPI_UPDATE_MAT_VALUES:
464       PetscCall(PCMPIUpdateMatValues(NULL));
465       break;
466     case PCMPI_VIEW:
467       // PetscCall(PCMPIView(NULL));
468       break;
469     case PCMPI_SOLVE:
470       PetscCall(PCMPISolve(NULL, NULL, NULL));
471       break;
472     case PCMPI_DESTROY:
473       PetscCall(PCMPIDestroy(NULL));
474       break;
475     case PCMPI_EXIT:
476       PetscCall(PetscFinalize());
477       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
478       break;
479     default:
480       break;
481     }
482   }
483   PetscFunctionReturn(PETSC_SUCCESS);
484 }
485 
486 /*@C
487   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
488   parallel KSP solves and management of parallel `KSP` objects.
489 
490   Logically Collective on all MPI ranks except 0
491 
492   Level: developer
493 
494   Note:
495   This is normally ended automatically in `PetscFinalize()` when the option is provided
496 
497 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
498 @*/
499 PetscErrorCode PCMPIServerEnd(void)
500 {
501   PCMPICommand request = PCMPI_EXIT;
502 
503   PetscFunctionBegin;
504   if (PetscGlobalRank == 0) {
505     PetscViewer       viewer = NULL;
506     PetscViewerFormat format;
507 
508     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
509     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
510     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
511     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
512     PetscOptionsEnd();
513     if (viewer) {
514       PetscBool isascii;
515 
516       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
517       if (isascii) {
518         PetscMPIInt size;
519         PetscMPIInt i;
520 
521         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
522         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
523         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
524         if (PCMPIKSPCountsSeq) {
525           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
526         }
527         for (i = 0; i < size; i++) {
528           if (PCMPIKSPCounts[i]) {
529             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
530           }
531         }
532       }
533       PetscCall(PetscViewerDestroy(&viewer));
534     }
535   }
536   PetscCall(PCMPICommsDestroy());
537   PCMPIServerActive = PETSC_FALSE;
538   PetscFunctionReturn(PETSC_SUCCESS);
539 }
540 
541 /*
542     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
543     because, for example, the problem is small. This version is more efficient because it does not require copying any data
544 */
545 static PetscErrorCode PCSetUp_Seq(PC pc)
546 {
547   PC_MPI     *km = (PC_MPI *)pc->data;
548   Mat         sA;
549   const char *prefix;
550   char       *found = NULL, *cprefix;
551 
552   PetscFunctionBegin;
553   PetscCall(PCGetOperators(pc, NULL, &sA));
554   PetscCall(PCGetOptionsPrefix(pc, &prefix));
555   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
556   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
557   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
558 
559   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
560   PetscCall(PCGetOptionsPrefix(pc, &prefix));
561   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
562   PetscCall(PetscStrallocpy(prefix, &cprefix));
563   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
564   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
565   *found = 0;
566   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
567   PetscCall(PetscFree(cprefix));
568 
569   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
570   PetscCall(KSPSetFromOptions(km->ksps[0]));
571   PetscCall(KSPSetUp(km->ksps[0]));
572   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
573   PCMPIKSPCountsSeq++;
574   PetscFunctionReturn(PETSC_SUCCESS);
575 }
576 
577 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
578 {
579   PC_MPI  *km = (PC_MPI *)pc->data;
580   PetscInt its, n;
581   Mat      A;
582 
583   PetscFunctionBegin;
584   PetscCall(KSPSolve(km->ksps[0], b, x));
585   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
586   PCMPISolveCountsSeq++;
587   PCMPIIterationsSeq += its;
588   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
589   PetscCall(MatGetSize(A, &n, NULL));
590   PCMPISizesSeq += n;
591   PetscFunctionReturn(PETSC_SUCCESS);
592 }
593 
594 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
595 {
596   PC_MPI *km = (PC_MPI *)pc->data;
597 
598   PetscFunctionBegin;
599   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
600   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
601   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
602   PetscFunctionReturn(PETSC_SUCCESS);
603 }
604 
605 static PetscErrorCode PCDestroy_Seq(PC pc)
606 {
607   PC_MPI *km = (PC_MPI *)pc->data;
608 
609   PetscFunctionBegin;
610   PetscCall(KSPDestroy(&km->ksps[0]));
611   PetscCall(PetscFree(pc->data));
612   PetscFunctionReturn(PETSC_SUCCESS);
613 }
614 
615 /*
616      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
617      right hand side to the parallel PC
618 */
619 static PetscErrorCode PCSetUp_MPI(PC pc)
620 {
621   PC_MPI      *km = (PC_MPI *)pc->data;
622   PetscMPIInt  rank, size;
623   PCMPICommand request;
624   PetscBool    newmatrix = PETSC_FALSE;
625 
626   PetscFunctionBegin;
627   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
628   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
629   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
630 
631   if (!pc->setupcalled) {
632     if (!km->alwaysuseserver) {
633       PetscInt n;
634       Mat      sA;
635       /* short circuit for small systems */
636       PetscCall(PCGetOperators(pc, &sA, &sA));
637       PetscCall(MatGetSize(sA, &n, NULL));
638       if (n < 2 * km->mincntperrank - 1 || size == 1) {
639         pc->ops->setup   = NULL;
640         pc->ops->apply   = PCApply_Seq;
641         pc->ops->destroy = PCDestroy_Seq;
642         pc->ops->view    = PCView_Seq;
643         PetscCall(PCSetUp_Seq(pc));
644         PetscFunctionReturn(PETSC_SUCCESS);
645       }
646     }
647 
648     request = PCMPI_CREATE;
649     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
650     PetscCall(PCMPICreate(pc));
651     newmatrix = PETSC_TRUE;
652   }
653   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
654 
655   if (newmatrix) {
656     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
657     request = PCMPI_SET_MAT;
658     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
659     PetscCall(PCMPISetMat(pc));
660   } else {
661     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
662     request = PCMPI_UPDATE_MAT_VALUES;
663     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
664     PetscCall(PCMPIUpdateMatValues(pc));
665   }
666   PetscFunctionReturn(PETSC_SUCCESS);
667 }
668 
669 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
670 {
671   PCMPICommand request = PCMPI_SOLVE;
672 
673   PetscFunctionBegin;
674   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
675   PetscCall(PCMPISolve(pc, b, x));
676   PetscFunctionReturn(PETSC_SUCCESS);
677 }
678 
679 static PetscErrorCode PCDestroy_MPI(PC pc)
680 {
681   PCMPICommand request = PCMPI_DESTROY;
682 
683   PetscFunctionBegin;
684   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
685   PetscCall(PCMPIDestroy(pc));
686   PetscCall(PetscFree(pc->data));
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 /*
691      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
692 */
693 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
694 {
695   PC_MPI     *km = (PC_MPI *)pc->data;
696   MPI_Comm    comm;
697   PetscMPIInt size;
698 
699   PetscFunctionBegin;
700   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
701   PetscCallMPI(MPI_Comm_size(comm, &size));
702   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
703   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
704   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
705   PetscFunctionReturn(PETSC_SUCCESS);
706 }
707 
708 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
709 {
710   PC_MPI *km = (PC_MPI *)pc->data;
711 
712   PetscFunctionBegin;
713   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
714   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
715   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
716   PetscOptionsHeadEnd();
717   PetscFunctionReturn(PETSC_SUCCESS);
718 }
719 
720 /*MC
721      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
722 
723    Options Database Keys for the Server:
724 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
725 -  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
726 
727    Options Database Keys for a specific `KSP` object
728 +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
729 -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
730 
731    Level: developer
732 
733    Notes:
734    The options database prefix for the actual solver is any prefix provided before use to the origjnal `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
735 
736    It can be particularly useful for user OpenMP code or potentially user GPU code.
737 
738    When the program is running with a single MPI process then it directly uses the provided matrix and right hand side
739    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
740 
741    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
742    because they are not the actual solver objects.
743 
744    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
745    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
746 
747    Developer Note:
748    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
749    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
750 
751 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
752 M*/
753 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
754 {
755   PC_MPI *km;
756   char   *found = NULL;
757 
758   PetscFunctionBegin;
759   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
760   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
761 
762   /* material from PCSetType() */
763   PetscTryTypeMethod(pc, destroy);
764   pc->ops->destroy = NULL;
765   pc->data         = NULL;
766 
767   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
768   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
769   pc->modifysubmatrices  = NULL;
770   pc->modifysubmatricesP = NULL;
771   pc->setupcalled        = 0;
772 
773   PetscCall(PetscNew(&km));
774   pc->data = (void *)km;
775 
776   km->mincntperrank = 10000;
777 
778   pc->ops->setup          = PCSetUp_MPI;
779   pc->ops->apply          = PCApply_MPI;
780   pc->ops->destroy        = PCDestroy_MPI;
781   pc->ops->view           = PCView_MPI;
782   pc->ops->setfromoptions = PCSetFromOptions_MPI;
783   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
784   PetscFunctionReturn(PETSC_SUCCESS);
785 }
786