xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision d9acb416d05abeed0a33bde3a81aeb2ea0364f6a)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petsc.h>
16 
17 #define PC_MPI_MAX_RANKS  256
18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
19 
20 typedef struct {
21   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26 } PC_MPI;
27 
28 typedef enum {
29   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30   PCMPI_CREATE,
31   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
32   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33   PCMPI_SOLVE,
34   PCMPI_VIEW,
35   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36 } PCMPICommand;
37 
38 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
39 static PetscBool PCMPICommSet = PETSC_FALSE;
40 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41 static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
42 
43 static PetscErrorCode PCMPICommsCreate(void)
44 {
45   MPI_Comm    comm = PC_MPI_COMM_WORLD;
46   PetscMPIInt size, rank, i;
47 
48   PetscFunctionBegin;
49   PetscCallMPI(MPI_Comm_size(comm, &size));
50   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
51   PetscCallMPI(MPI_Comm_rank(comm, &rank));
52   /* comm for size 1 is useful only for debugging */
53   for (i = 0; i < size; i++) {
54     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
55     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
56     PCMPISolveCounts[i] = 0;
57     PCMPIKSPCounts[i]   = 0;
58     PCMPIIterations[i]  = 0;
59     PCMPISizes[i]       = 0;
60   }
61   PCMPICommSet = PETSC_TRUE;
62   PetscFunctionReturn(PETSC_SUCCESS);
63 }
64 
65 PetscErrorCode PCMPICommsDestroy(void)
66 {
67   MPI_Comm    comm = PC_MPI_COMM_WORLD;
68   PetscMPIInt size, rank, i;
69 
70   PetscFunctionBegin;
71   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
72   PetscCallMPI(MPI_Comm_size(comm, &size));
73   PetscCallMPI(MPI_Comm_rank(comm, &rank));
74   for (i = 0; i < size; i++) {
75     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
76   }
77   PCMPICommSet = PETSC_FALSE;
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 static PetscErrorCode PCMPICreate(PC pc)
82 {
83   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
84   MPI_Comm    comm = PC_MPI_COMM_WORLD;
85   KSP         ksp;
86   PetscInt    N[2], mincntperrank = 0;
87   PetscMPIInt size;
88   Mat         sA;
89   char       *cprefix = NULL;
90   PetscMPIInt len     = 0;
91 
92   PetscFunctionBegin;
93   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
94   PetscCallMPI(MPI_Comm_size(comm, &size));
95   if (pc) {
96     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
97     PetscCall(PCGetOperators(pc, &sA, &sA));
98     PetscCall(MatGetSize(sA, &N[0], &N[1]));
99   }
100   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
101 
102   /* choose a suitable sized MPI_Comm for the problem to be solved on */
103   if (km) mincntperrank = km->mincntperrank;
104   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106   if (comm == MPI_COMM_NULL) {
107     ksp = NULL;
108     PetscFunctionReturn(PETSC_SUCCESS);
109   }
110   PetscCall(PetscLogStagePush(PCMPIStage));
111   PetscCall(KSPCreate(comm, &ksp));
112   PetscCall(KSPSetNestLevel(ksp, 1));
113   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
114   PetscCall(PetscLogStagePop());
115   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
116   if (pc) {
117     size_t      slen;
118     const char *prefix = NULL;
119     char       *found  = NULL;
120 
121     PetscCallMPI(MPI_Comm_size(comm, &size));
122     PCMPIKSPCounts[size - 1]++;
123     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
124     PetscCall(PCGetOptionsPrefix(pc, &prefix));
125     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
126     PetscCall(PetscStrallocpy(prefix, &cprefix));
127     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
128     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
129     *found = 0;
130     PetscCall(PetscStrlen(cprefix, &slen));
131     len = (PetscMPIInt)slen;
132   }
133   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
134   if (len) {
135     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
136     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
137     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
138   }
139   PetscCall(PetscFree(cprefix));
140   PetscFunctionReturn(PETSC_SUCCESS);
141 }
142 
143 static PetscErrorCode PCMPISetMat(PC pc)
144 {
145   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
146   Mat                A;
147   PetscInt           m, n, *ia, *ja, j, bs;
148   Mat                sA;
149   MPI_Comm           comm = PC_MPI_COMM_WORLD;
150   KSP                ksp;
151   PetscLayout        layout;
152   const PetscInt    *IA = NULL, *JA = NULL;
153   const PetscInt    *range;
154   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
155   PetscScalar       *a;
156   const PetscScalar *sa               = NULL;
157   PetscInt           matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0};
158   char              *cprefix;
159 
160   PetscFunctionBegin;
161   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
162   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
163   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
164   if (pc) {
165     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
166     const char *prefix;
167     size_t      clen;
168 
169     PetscCallMPI(MPI_Comm_size(comm, &size));
170     PCMPIMatCounts[size - 1]++;
171     PetscCall(PCGetOperators(pc, &sA, &sA));
172     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
173     PetscCall(MatGetBlockSize(sA, &bs));
174     matproperties[2] = bs;
175     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
176     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
177     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
178     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
179     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
180     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
181     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
182     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
183     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
184     PetscCall(MatGetOptionsPrefix(sA, &prefix));
185     PetscCall(PetscStrallocpy(prefix, &cprefix));
186     PetscCall(PetscStrlen(cprefix, &clen));
187     matproperties[7] = (PetscInt)clen;
188   }
189   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
190 
191   /* determine ownership ranges of matrix columns */
192   PetscCall(PetscLayoutCreate(comm, &layout));
193   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
194   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
195   PetscCall(PetscLayoutSetUp(layout));
196   PetscCall(PetscLayoutGetLocalSize(layout, &n));
197   PetscCall(PetscLayoutDestroy(&layout));
198 
199   /* determine ownership ranges of matrix rows */
200   PetscCall(PetscLayoutCreate(comm, &layout));
201   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
202   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
203   PetscCall(PetscLayoutSetUp(layout));
204   PetscCall(PetscLayoutGetLocalSize(layout, &m));
205 
206   /* copy over the matrix nonzero structure and values */
207   if (pc) {
208     NZ      = km->NZ;
209     NZdispl = km->NZdispl;
210     PetscCall(PetscLayoutGetRanges(layout, &range));
211     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
212     for (i = 0; i < size; i++) {
213       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
214       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
215     }
216     displi[0]  = 0;
217     NZdispl[0] = 0;
218     for (j = 1; j < size; j++) {
219       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
220       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
221     }
222     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
223   }
224   PetscCall(PetscLayoutDestroy(&layout));
225   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
226 
227   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
228   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
229   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
230   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
231 
232   if (pc) {
233     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
234     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
235   }
236 
237   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
238   ia[0] = 0;
239   PetscCall(PetscLogStagePush(PCMPIStage));
240   PetscCall(MatCreate(comm, &A));
241   if (matproperties[7] > 0) {
242     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
243     PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm));
244     PetscCall(MatSetOptionsPrefix(A, cprefix));
245     PetscCall(PetscFree(cprefix));
246   }
247   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
248   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
249   PetscCall(MatSetType(A, MATMPIAIJ));
250   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
251   PetscCall(MatSetBlockSize(A, matproperties[2]));
252 
253   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
254   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
255   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
256   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
257 
258   PetscCall(PetscFree3(ia, ja, a));
259   PetscCall(KSPSetOperators(ksp, A, A));
260   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
261   PetscCall(PetscLogStagePop());
262   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
263     const PetscInt *range;
264 
265     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
266     for (i = 0; i < size; i++) {
267       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
268       km->displ[i]     = (PetscMPIInt)range[i];
269     }
270   }
271   PetscCall(MatDestroy(&A));
272   PetscCall(KSPSetFromOptions(ksp));
273   PetscFunctionReturn(PETSC_SUCCESS);
274 }
275 
276 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
277 {
278   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
279   KSP                ksp;
280   Mat                sA, A;
281   MPI_Comm           comm = PC_MPI_COMM_WORLD;
282   PetscScalar       *a;
283   PetscCount         nz;
284   const PetscScalar *sa = NULL;
285   PetscMPIInt        size;
286   PetscInt           matproperties[4] = {0, 0, 0, 0};
287 
288   PetscFunctionBegin;
289   if (pc) {
290     PetscCall(PCGetOperators(pc, &sA, &sA));
291     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
292   }
293   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
294   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
295   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
296   PetscCallMPI(MPI_Comm_size(comm, &size));
297   PCMPIMatCounts[size - 1]++;
298   PetscCall(KSPGetOperators(ksp, NULL, &A));
299   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
300   PetscCall(PetscMalloc1(nz, &a));
301   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
302   if (pc) {
303     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
304 
305     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
306 
307     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
308     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
309     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
310     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
311     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
312     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
313     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
314     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
315   }
316   PetscCall(MatUpdateMPIAIJWithArray(A, a));
317   PetscCall(PetscFree(a));
318   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
319   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
320   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
321   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
322   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
323   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
328 {
329   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
330   KSP                ksp;
331   MPI_Comm           comm = PC_MPI_COMM_WORLD;
332   const PetscScalar *sb   = NULL, *x;
333   PetscScalar       *b, *sx = NULL;
334   PetscInt           its, n;
335   PetscMPIInt        size;
336 
337   PetscFunctionBegin;
338   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
339   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
340   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
341 
342   /* TODO: optimize code to not require building counts/displ every time */
343 
344   /* scatterv rhs */
345   PetscCallMPI(MPI_Comm_size(comm, &size));
346   if (pc) {
347     PetscInt N;
348 
349     PCMPISolveCounts[size - 1]++;
350     PetscCall(MatGetSize(pc->pmat, &N, NULL));
351     PCMPISizes[size - 1] += N;
352     PetscCall(VecGetArrayRead(B, &sb));
353   }
354   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
355   PetscCall(VecGetArray(ksp->vec_rhs, &b));
356   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
357   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
358   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
359 
360   PetscCall(PetscLogStagePush(PCMPIStage));
361   PetscCall(KSPSolve(ksp, NULL, NULL));
362   PetscCall(PetscLogStagePop());
363   PetscCall(KSPGetIterationNumber(ksp, &its));
364   PCMPIIterations[size - 1] += its;
365 
366   /* gather solution */
367   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
368   if (pc) PetscCall(VecGetArray(X, &sx));
369   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
370   if (pc) PetscCall(VecRestoreArray(X, &sx));
371   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
372   PetscFunctionReturn(PETSC_SUCCESS);
373 }
374 
375 static PetscErrorCode PCMPIDestroy(PC pc)
376 {
377   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
378   KSP      ksp;
379   MPI_Comm comm = PC_MPI_COMM_WORLD;
380 
381   PetscFunctionBegin;
382   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
383   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
384   PetscCall(PetscLogStagePush(PCMPIStage));
385   PetscCall(KSPDestroy(&ksp));
386   PetscCall(PetscLogStagePop());
387   PetscFunctionReturn(PETSC_SUCCESS);
388 }
389 
390 PetscBool PCMPIServerActive = PETSC_FALSE;
391 
392 /*@C
393   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
394   parallel `KSP` solves and management of parallel `KSP` objects.
395 
396   Logically Collective on all MPI processes except rank 0
397 
398   Options Database Keys:
399 + -mpi_linear_solver_server      - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
400 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
401 
402   Level: developer
403 
404   Note:
405   This is normally started automatically in `PetscInitialize()` when the option is provided
406 
407   See `PCMPI` for information on using the solver with a `KSP` object
408 
409   Developer Notes:
410   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
411   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
412   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
413 
414   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
415 
416   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
417 
418   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
419 
420   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
421   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
422 
423   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
424   all MPI processes but the user callback only runs on one MPI process per node.
425 
426   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
427   the `MPI_Comm` argument from PETSc calls.
428 
429 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
430 @*/
431 PetscErrorCode PCMPIServerBegin(void)
432 {
433   PetscMPIInt rank;
434 
435   PetscFunctionBegin;
436   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
437   if (PetscDefined(USE_SINGLE_LIBRARY)) {
438     PetscCall(VecInitializePackage());
439     PetscCall(MatInitializePackage());
440     PetscCall(DMInitializePackage());
441     PetscCall(PCInitializePackage());
442     PetscCall(KSPInitializePackage());
443     PetscCall(SNESInitializePackage());
444     PetscCall(TSInitializePackage());
445     PetscCall(TaoInitializePackage());
446   }
447   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
448 
449   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
450   if (rank == 0) {
451     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
452     PCMPIServerActive = PETSC_TRUE;
453     PetscFunctionReturn(PETSC_SUCCESS);
454   }
455 
456   while (PETSC_TRUE) {
457     PCMPICommand request = PCMPI_CREATE;
458     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
459     switch (request) {
460     case PCMPI_CREATE:
461       PetscCall(PCMPICreate(NULL));
462       break;
463     case PCMPI_SET_MAT:
464       PetscCall(PCMPISetMat(NULL));
465       break;
466     case PCMPI_UPDATE_MAT_VALUES:
467       PetscCall(PCMPIUpdateMatValues(NULL));
468       break;
469     case PCMPI_VIEW:
470       // PetscCall(PCMPIView(NULL));
471       break;
472     case PCMPI_SOLVE:
473       PetscCall(PCMPISolve(NULL, NULL, NULL));
474       break;
475     case PCMPI_DESTROY:
476       PetscCall(PCMPIDestroy(NULL));
477       break;
478     case PCMPI_EXIT:
479       PetscCall(PetscFinalize());
480       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
481       break;
482     default:
483       break;
484     }
485   }
486   PetscFunctionReturn(PETSC_SUCCESS);
487 }
488 
489 /*@C
490   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
491   parallel KSP solves and management of parallel `KSP` objects.
492 
493   Logically Collective on all MPI ranks except 0
494 
495   Level: developer
496 
497   Note:
498   This is normally ended automatically in `PetscFinalize()` when the option is provided
499 
500 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
501 @*/
502 PetscErrorCode PCMPIServerEnd(void)
503 {
504   PCMPICommand request = PCMPI_EXIT;
505 
506   PetscFunctionBegin;
507   if (PetscGlobalRank == 0) {
508     PetscViewer       viewer = NULL;
509     PetscViewerFormat format;
510 
511     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
512     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
513     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
514     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
515     PetscOptionsEnd();
516     if (viewer) {
517       PetscBool isascii;
518 
519       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
520       if (isascii) {
521         PetscMPIInt size;
522         PetscMPIInt i;
523 
524         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
525         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
526         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
527         if (PCMPIKSPCountsSeq) {
528           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
529         }
530         for (i = 0; i < size; i++) {
531           if (PCMPIKSPCounts[i]) {
532             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
533           }
534         }
535       }
536       PetscCall(PetscOptionsRestoreViewer(&viewer));
537     }
538   }
539   PetscCall(PCMPICommsDestroy());
540   PCMPIServerActive = PETSC_FALSE;
541   PetscFunctionReturn(PETSC_SUCCESS);
542 }
543 
544 /*
545     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
546     because, for example, the problem is small. This version is more efficient because it does not require copying any data
547 */
548 static PetscErrorCode PCSetUp_Seq(PC pc)
549 {
550   PC_MPI     *km = (PC_MPI *)pc->data;
551   Mat         sA;
552   const char *prefix;
553   char       *found = NULL, *cprefix;
554 
555   PetscFunctionBegin;
556   PetscCall(PCGetOperators(pc, NULL, &sA));
557   PetscCall(PCGetOptionsPrefix(pc, &prefix));
558   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
559   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
560   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
561 
562   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
563   PetscCall(PCGetOptionsPrefix(pc, &prefix));
564   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
565   PetscCall(PetscStrallocpy(prefix, &cprefix));
566   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
567   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
568   *found = 0;
569   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
570   PetscCall(PetscFree(cprefix));
571 
572   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
573   PetscCall(KSPSetFromOptions(km->ksps[0]));
574   PetscCall(KSPSetUp(km->ksps[0]));
575   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
576   PCMPIKSPCountsSeq++;
577   PetscFunctionReturn(PETSC_SUCCESS);
578 }
579 
580 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
581 {
582   PC_MPI  *km = (PC_MPI *)pc->data;
583   PetscInt its, n;
584   Mat      A;
585 
586   PetscFunctionBegin;
587   PetscCall(KSPSolve(km->ksps[0], b, x));
588   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
589   PCMPISolveCountsSeq++;
590   PCMPIIterationsSeq += its;
591   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
592   PetscCall(MatGetSize(A, &n, NULL));
593   PCMPISizesSeq += n;
594   PetscFunctionReturn(PETSC_SUCCESS);
595 }
596 
597 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
598 {
599   PC_MPI *km = (PC_MPI *)pc->data;
600 
601   PetscFunctionBegin;
602   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
603   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
604   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
605   PetscFunctionReturn(PETSC_SUCCESS);
606 }
607 
608 static PetscErrorCode PCDestroy_Seq(PC pc)
609 {
610   PC_MPI *km = (PC_MPI *)pc->data;
611 
612   PetscFunctionBegin;
613   PetscCall(KSPDestroy(&km->ksps[0]));
614   PetscCall(PetscFree(pc->data));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 /*
619      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
620      right hand side to the parallel PC
621 */
622 static PetscErrorCode PCSetUp_MPI(PC pc)
623 {
624   PC_MPI      *km = (PC_MPI *)pc->data;
625   PetscMPIInt  rank, size;
626   PCMPICommand request;
627   PetscBool    newmatrix = PETSC_FALSE;
628 
629   PetscFunctionBegin;
630   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
631   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
632   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
633 
634   if (!pc->setupcalled) {
635     if (!km->alwaysuseserver) {
636       PetscInt n;
637       Mat      sA;
638       /* short circuit for small systems */
639       PetscCall(PCGetOperators(pc, &sA, &sA));
640       PetscCall(MatGetSize(sA, &n, NULL));
641       if (n < 2 * km->mincntperrank - 1 || size == 1) {
642         pc->ops->setup   = NULL;
643         pc->ops->apply   = PCApply_Seq;
644         pc->ops->destroy = PCDestroy_Seq;
645         pc->ops->view    = PCView_Seq;
646         PetscCall(PCSetUp_Seq(pc));
647         PetscFunctionReturn(PETSC_SUCCESS);
648       }
649     }
650 
651     request = PCMPI_CREATE;
652     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
653     PetscCall(PCMPICreate(pc));
654     newmatrix = PETSC_TRUE;
655   }
656   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
657 
658   if (newmatrix) {
659     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
660     request = PCMPI_SET_MAT;
661     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
662     PetscCall(PCMPISetMat(pc));
663   } else {
664     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
665     request = PCMPI_UPDATE_MAT_VALUES;
666     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
667     PetscCall(PCMPIUpdateMatValues(pc));
668   }
669   PetscFunctionReturn(PETSC_SUCCESS);
670 }
671 
672 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
673 {
674   PCMPICommand request = PCMPI_SOLVE;
675 
676   PetscFunctionBegin;
677   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
678   PetscCall(PCMPISolve(pc, b, x));
679   PetscFunctionReturn(PETSC_SUCCESS);
680 }
681 
682 static PetscErrorCode PCDestroy_MPI(PC pc)
683 {
684   PCMPICommand request = PCMPI_DESTROY;
685 
686   PetscFunctionBegin;
687   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
688   PetscCall(PCMPIDestroy(pc));
689   PetscCall(PetscFree(pc->data));
690   PetscFunctionReturn(PETSC_SUCCESS);
691 }
692 
693 /*
694      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
695 */
696 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
697 {
698   PC_MPI     *km = (PC_MPI *)pc->data;
699   MPI_Comm    comm;
700   PetscMPIInt size;
701 
702   PetscFunctionBegin;
703   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
704   PetscCallMPI(MPI_Comm_size(comm, &size));
705   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
706   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
707   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
708   PetscFunctionReturn(PETSC_SUCCESS);
709 }
710 
711 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
712 {
713   PC_MPI *km = (PC_MPI *)pc->data;
714 
715   PetscFunctionBegin;
716   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
717   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
718   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
719   PetscOptionsHeadEnd();
720   PetscFunctionReturn(PETSC_SUCCESS);
721 }
722 
723 /*MC
724      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
725 
726    Options Database Keys for the Server:
727 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
728 -  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
729 
730    Options Database Keys for a specific `KSP` object
731 +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
732 -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
733 
734    Level: developer
735 
736    Notes:
737    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
738 
739    It can be particularly useful for user OpenMP code or potentially user GPU code.
740 
741    When the program is running with a single MPI process then it directly uses the provided matrix and right hand side
742    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
743 
744    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
745    because they are not the actual solver objects.
746 
747    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
748    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
749 
750    Developer Note:
751    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
752    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
753 
754 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
755 M*/
756 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
757 {
758   PC_MPI *km;
759   char   *found = NULL;
760 
761   PetscFunctionBegin;
762   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
763   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
764 
765   /* material from PCSetType() */
766   PetscTryTypeMethod(pc, destroy);
767   pc->ops->destroy = NULL;
768   pc->data         = NULL;
769 
770   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
771   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
772   pc->modifysubmatrices  = NULL;
773   pc->modifysubmatricesP = NULL;
774   pc->setupcalled        = 0;
775 
776   PetscCall(PetscNew(&km));
777   pc->data = (void *)km;
778 
779   km->mincntperrank = 10000;
780 
781   pc->ops->setup          = PCSetUp_MPI;
782   pc->ops->apply          = PCApply_MPI;
783   pc->ops->destroy        = PCDestroy_MPI;
784   pc->ops->view           = PCView_MPI;
785   pc->ops->setfromoptions = PCSetFromOptions_MPI;
786   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
787   PetscFunctionReturn(PETSC_SUCCESS);
788 }
789