xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 98d129c30f3ee9fdddc40fdbc5a989b7be64f888)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right-hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petscts.h>
16 #include <petsctao.h>
17 
18 #define PC_MPI_MAX_RANKS  256
19 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
20 
21 typedef struct {
22   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
23   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
24   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
25   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
26   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
27 } PC_MPI;
28 
29 typedef enum {
30   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
31   PCMPI_CREATE,
32   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
33   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
34   PCMPI_SOLVE,
35   PCMPI_VIEW,
36   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
37 } PCMPICommand;
38 
39 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
40 static PetscBool PCMPICommSet = PETSC_FALSE;
41 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
42 static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
43 
44 static PetscErrorCode PCMPICommsCreate(void)
45 {
46   MPI_Comm    comm = PC_MPI_COMM_WORLD;
47   PetscMPIInt size, rank, i;
48 
49   PetscFunctionBegin;
50   PetscCallMPI(MPI_Comm_size(comm, &size));
51   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
52   PetscCallMPI(MPI_Comm_rank(comm, &rank));
53   /* comm for size 1 is useful only for debugging */
54   for (i = 0; i < size; i++) {
55     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
56     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
57     PCMPISolveCounts[i] = 0;
58     PCMPIKSPCounts[i]   = 0;
59     PCMPIIterations[i]  = 0;
60     PCMPISizes[i]       = 0;
61   }
62   PCMPICommSet = PETSC_TRUE;
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
66 PetscErrorCode PCMPICommsDestroy(void)
67 {
68   MPI_Comm    comm = PC_MPI_COMM_WORLD;
69   PetscMPIInt size, rank, i;
70 
71   PetscFunctionBegin;
72   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
73   PetscCallMPI(MPI_Comm_size(comm, &size));
74   PetscCallMPI(MPI_Comm_rank(comm, &rank));
75   for (i = 0; i < size; i++) {
76     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
77   }
78   PCMPICommSet = PETSC_FALSE;
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 static PetscErrorCode PCMPICreate(PC pc)
83 {
84   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
85   MPI_Comm    comm = PC_MPI_COMM_WORLD;
86   KSP         ksp;
87   PetscInt    N[2], mincntperrank = 0;
88   PetscMPIInt size;
89   Mat         sA;
90   char       *cprefix = NULL;
91   PetscMPIInt len     = 0;
92 
93   PetscFunctionBegin;
94   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
95   PetscCallMPI(MPI_Comm_size(comm, &size));
96   if (pc) {
97     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
98     PetscCall(PCGetOperators(pc, &sA, &sA));
99     PetscCall(MatGetSize(sA, &N[0], &N[1]));
100   }
101   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
102 
103   /* choose a suitable sized MPI_Comm for the problem to be solved on */
104   if (km) mincntperrank = km->mincntperrank;
105   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
106   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
107   if (comm == MPI_COMM_NULL) {
108     ksp = NULL;
109     PetscFunctionReturn(PETSC_SUCCESS);
110   }
111   PetscCall(PetscLogStagePush(PCMPIStage));
112   PetscCall(KSPCreate(comm, &ksp));
113   PetscCall(KSPSetNestLevel(ksp, 1));
114   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
115   PetscCall(PetscLogStagePop());
116   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
117   if (pc) {
118     size_t      slen;
119     const char *prefix = NULL;
120     char       *found  = NULL;
121 
122     PetscCallMPI(MPI_Comm_size(comm, &size));
123     PCMPIKSPCounts[size - 1]++;
124     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
125     PetscCall(PCGetOptionsPrefix(pc, &prefix));
126     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
127     PetscCall(PetscStrallocpy(prefix, &cprefix));
128     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
129     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
130     *found = 0;
131     PetscCall(PetscStrlen(cprefix, &slen));
132     len = (PetscMPIInt)slen;
133   }
134   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
135   if (len) {
136     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
137     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
138     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
139   }
140   PetscCall(PetscFree(cprefix));
141   PetscFunctionReturn(PETSC_SUCCESS);
142 }
143 
144 static PetscErrorCode PCMPISetMat(PC pc)
145 {
146   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
147   Mat                A;
148   PetscInt           m, n, *ia, *ja, j, bs;
149   Mat                sA;
150   MPI_Comm           comm = PC_MPI_COMM_WORLD;
151   KSP                ksp;
152   PetscLayout        layout;
153   const PetscInt    *IA = NULL, *JA = NULL;
154   const PetscInt    *range;
155   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
156   PetscScalar       *a;
157   const PetscScalar *sa               = NULL;
158   PetscInt           matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0};
159   char              *cprefix;
160 
161   PetscFunctionBegin;
162   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
163   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
164   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
165   if (pc) {
166     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
167     const char *prefix;
168     size_t      clen;
169 
170     PetscCallMPI(MPI_Comm_size(comm, &size));
171     PCMPIMatCounts[size - 1]++;
172     PetscCall(PCGetOperators(pc, &sA, &sA));
173     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
174     PetscCall(MatGetBlockSize(sA, &bs));
175     matproperties[2] = bs;
176     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
177     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
178     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
179     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
180     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
181     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
182     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
183     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
184     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
185     PetscCall(MatGetOptionsPrefix(sA, &prefix));
186     PetscCall(PetscStrallocpy(prefix, &cprefix));
187     PetscCall(PetscStrlen(cprefix, &clen));
188     matproperties[7] = (PetscInt)clen;
189   }
190   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
191 
192   /* determine ownership ranges of matrix columns */
193   PetscCall(PetscLayoutCreate(comm, &layout));
194   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
195   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
196   PetscCall(PetscLayoutSetUp(layout));
197   PetscCall(PetscLayoutGetLocalSize(layout, &n));
198   PetscCall(PetscLayoutDestroy(&layout));
199 
200   /* determine ownership ranges of matrix rows */
201   PetscCall(PetscLayoutCreate(comm, &layout));
202   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
203   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
204   PetscCall(PetscLayoutSetUp(layout));
205   PetscCall(PetscLayoutGetLocalSize(layout, &m));
206 
207   /* copy over the matrix nonzero structure and values */
208   if (pc) {
209     NZ      = km->NZ;
210     NZdispl = km->NZdispl;
211     PetscCall(PetscLayoutGetRanges(layout, &range));
212     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
213     for (i = 0; i < size; i++) {
214       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
215       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
216     }
217     displi[0]  = 0;
218     NZdispl[0] = 0;
219     for (j = 1; j < size; j++) {
220       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
221       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
222     }
223     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
224   }
225   PetscCall(PetscLayoutDestroy(&layout));
226   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
227 
228   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
229   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
230   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
231   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
232 
233   if (pc) {
234     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
235     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
236   }
237 
238   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
239   ia[0] = 0;
240   PetscCall(PetscLogStagePush(PCMPIStage));
241   PetscCall(MatCreate(comm, &A));
242   if (matproperties[7] > 0) {
243     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
244     PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm));
245     PetscCall(MatSetOptionsPrefix(A, cprefix));
246     PetscCall(PetscFree(cprefix));
247   }
248   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
249   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
250   PetscCall(MatSetType(A, MATMPIAIJ));
251   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
252   PetscCall(MatSetBlockSize(A, matproperties[2]));
253 
254   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
255   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
256   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
257   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
258 
259   PetscCall(PetscFree3(ia, ja, a));
260   PetscCall(KSPSetOperators(ksp, A, A));
261   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
262   PetscCall(PetscLogStagePop());
263   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
264     const PetscInt *range;
265 
266     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
267     for (i = 0; i < size; i++) {
268       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
269       km->displ[i]     = (PetscMPIInt)range[i];
270     }
271   }
272   PetscCall(MatDestroy(&A));
273   PetscCall(KSPSetFromOptions(ksp));
274   PetscFunctionReturn(PETSC_SUCCESS);
275 }
276 
277 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
278 {
279   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
280   KSP                ksp;
281   Mat                sA, A;
282   MPI_Comm           comm = PC_MPI_COMM_WORLD;
283   PetscScalar       *a;
284   PetscCount         nz;
285   const PetscScalar *sa = NULL;
286   PetscMPIInt        size;
287   PetscInt           matproperties[4] = {0, 0, 0, 0};
288 
289   PetscFunctionBegin;
290   if (pc) {
291     PetscCall(PCGetOperators(pc, &sA, &sA));
292     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
293   }
294   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
295   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
296   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
297   PetscCallMPI(MPI_Comm_size(comm, &size));
298   PCMPIMatCounts[size - 1]++;
299   PetscCall(KSPGetOperators(ksp, NULL, &A));
300   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
301   PetscCall(PetscMalloc1(nz, &a));
302   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
303   if (pc) {
304     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
305 
306     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
307 
308     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
309     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
310     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
311     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
312     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
313     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
314     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
315     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
316   }
317   PetscCall(MatUpdateMPIAIJWithArray(A, a));
318   PetscCall(PetscFree(a));
319   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
320   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
321   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
322   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
323   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
324   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
325   PetscFunctionReturn(PETSC_SUCCESS);
326 }
327 
328 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
329 {
330   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
331   KSP                ksp;
332   MPI_Comm           comm = PC_MPI_COMM_WORLD;
333   const PetscScalar *sb   = NULL, *x;
334   PetscScalar       *b, *sx = NULL;
335   PetscInt           its, n;
336   PetscMPIInt        size;
337 
338   PetscFunctionBegin;
339   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
340   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
341   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
342 
343   /* TODO: optimize code to not require building counts/displ every time */
344 
345   /* scatterv rhs */
346   PetscCallMPI(MPI_Comm_size(comm, &size));
347   if (pc) {
348     PetscInt N;
349 
350     PCMPISolveCounts[size - 1]++;
351     PetscCall(MatGetSize(pc->pmat, &N, NULL));
352     PCMPISizes[size - 1] += N;
353     PetscCall(VecGetArrayRead(B, &sb));
354   }
355   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
356   PetscCall(VecGetArray(ksp->vec_rhs, &b));
357   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
358   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
359   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
360 
361   PetscCall(PetscLogStagePush(PCMPIStage));
362   PetscCall(KSPSolve(ksp, NULL, NULL));
363   PetscCall(PetscLogStagePop());
364   PetscCall(KSPGetIterationNumber(ksp, &its));
365   PCMPIIterations[size - 1] += its;
366 
367   /* gather solution */
368   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
369   if (pc) PetscCall(VecGetArray(X, &sx));
370   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
371   if (pc) PetscCall(VecRestoreArray(X, &sx));
372   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
373   PetscFunctionReturn(PETSC_SUCCESS);
374 }
375 
376 static PetscErrorCode PCMPIDestroy(PC pc)
377 {
378   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
379   KSP      ksp;
380   MPI_Comm comm = PC_MPI_COMM_WORLD;
381 
382   PetscFunctionBegin;
383   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
384   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
385   PetscCall(PetscLogStagePush(PCMPIStage));
386   PetscCall(KSPDestroy(&ksp));
387   PetscCall(PetscLogStagePop());
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 PetscBool PCMPIServerActive = PETSC_FALSE;
392 
393 /*@C
394   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
395   parallel `KSP` solves and management of parallel `KSP` objects.
396 
397   Logically Collective on all MPI processes except rank 0
398 
399   Options Database Keys:
400 + -mpi_linear_solver_server      - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
401 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
402 
403   Level: developer
404 
405   Note:
406   This is normally started automatically in `PetscInitialize()` when the option is provided
407 
408   See `PCMPI` for information on using the solver with a `KSP` object
409 
410   Developer Notes:
411   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
412   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
413   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
414 
415   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
416 
417   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
418 
419   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
420 
421   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
422   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
423 
424   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
425   all MPI processes but the user callback only runs on one MPI process per node.
426 
427   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
428   the `MPI_Comm` argument from PETSc calls.
429 
430 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
431 @*/
432 PetscErrorCode PCMPIServerBegin(void)
433 {
434   PetscMPIInt rank;
435 
436   PetscFunctionBegin;
437   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
438   if (PetscDefined(USE_SINGLE_LIBRARY)) {
439     PetscCall(VecInitializePackage());
440     PetscCall(MatInitializePackage());
441     PetscCall(DMInitializePackage());
442     PetscCall(PCInitializePackage());
443     PetscCall(KSPInitializePackage());
444     PetscCall(SNESInitializePackage());
445     PetscCall(TSInitializePackage());
446     PetscCall(TaoInitializePackage());
447   }
448   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
449 
450   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
451   if (rank == 0) {
452     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
453     PCMPIServerActive = PETSC_TRUE;
454     PetscFunctionReturn(PETSC_SUCCESS);
455   }
456 
457   while (PETSC_TRUE) {
458     PCMPICommand request = PCMPI_CREATE;
459     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
460     switch (request) {
461     case PCMPI_CREATE:
462       PetscCall(PCMPICreate(NULL));
463       break;
464     case PCMPI_SET_MAT:
465       PetscCall(PCMPISetMat(NULL));
466       break;
467     case PCMPI_UPDATE_MAT_VALUES:
468       PetscCall(PCMPIUpdateMatValues(NULL));
469       break;
470     case PCMPI_VIEW:
471       // PetscCall(PCMPIView(NULL));
472       break;
473     case PCMPI_SOLVE:
474       PetscCall(PCMPISolve(NULL, NULL, NULL));
475       break;
476     case PCMPI_DESTROY:
477       PetscCall(PCMPIDestroy(NULL));
478       break;
479     case PCMPI_EXIT:
480       PetscCall(PetscFinalize());
481       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
482       break;
483     default:
484       break;
485     }
486   }
487   PetscFunctionReturn(PETSC_SUCCESS);
488 }
489 
490 /*@C
491   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
492   parallel KSP solves and management of parallel `KSP` objects.
493 
494   Logically Collective on all MPI ranks except 0
495 
496   Level: developer
497 
498   Note:
499   This is normally ended automatically in `PetscFinalize()` when the option is provided
500 
501 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
502 @*/
503 PetscErrorCode PCMPIServerEnd(void)
504 {
505   PCMPICommand request = PCMPI_EXIT;
506 
507   PetscFunctionBegin;
508   if (PetscGlobalRank == 0) {
509     PetscViewer       viewer = NULL;
510     PetscViewerFormat format;
511 
512     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
513     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
514     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
515     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
516     PetscOptionsEnd();
517     if (viewer) {
518       PetscBool isascii;
519 
520       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
521       if (isascii) {
522         PetscMPIInt size;
523         PetscMPIInt i;
524 
525         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
526         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
527         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
528         if (PCMPIKSPCountsSeq) {
529           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
530         }
531         for (i = 0; i < size; i++) {
532           if (PCMPIKSPCounts[i]) {
533             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
534           }
535         }
536       }
537       PetscCall(PetscOptionsRestoreViewer(&viewer));
538     }
539   }
540   PetscCall(PCMPICommsDestroy());
541   PCMPIServerActive = PETSC_FALSE;
542   PetscFunctionReturn(PETSC_SUCCESS);
543 }
544 
545 /*
546     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
547     because, for example, the problem is small. This version is more efficient because it does not require copying any data
548 */
549 static PetscErrorCode PCSetUp_Seq(PC pc)
550 {
551   PC_MPI     *km = (PC_MPI *)pc->data;
552   Mat         sA;
553   const char *prefix;
554   char       *found = NULL, *cprefix;
555 
556   PetscFunctionBegin;
557   PetscCall(PCGetOperators(pc, NULL, &sA));
558   PetscCall(PCGetOptionsPrefix(pc, &prefix));
559   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
560   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
561   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
562 
563   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
564   PetscCall(PCGetOptionsPrefix(pc, &prefix));
565   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
566   PetscCall(PetscStrallocpy(prefix, &cprefix));
567   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
568   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
569   *found = 0;
570   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
571   PetscCall(PetscFree(cprefix));
572 
573   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
574   PetscCall(KSPSetFromOptions(km->ksps[0]));
575   PetscCall(KSPSetUp(km->ksps[0]));
576   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
577   PCMPIKSPCountsSeq++;
578   PetscFunctionReturn(PETSC_SUCCESS);
579 }
580 
581 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
582 {
583   PC_MPI  *km = (PC_MPI *)pc->data;
584   PetscInt its, n;
585   Mat      A;
586 
587   PetscFunctionBegin;
588   PetscCall(KSPSolve(km->ksps[0], b, x));
589   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
590   PCMPISolveCountsSeq++;
591   PCMPIIterationsSeq += its;
592   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
593   PetscCall(MatGetSize(A, &n, NULL));
594   PCMPISizesSeq += n;
595   PetscFunctionReturn(PETSC_SUCCESS);
596 }
597 
598 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
599 {
600   PC_MPI *km = (PC_MPI *)pc->data;
601 
602   PetscFunctionBegin;
603   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
604   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
605   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
606   PetscFunctionReturn(PETSC_SUCCESS);
607 }
608 
609 static PetscErrorCode PCDestroy_Seq(PC pc)
610 {
611   PC_MPI *km = (PC_MPI *)pc->data;
612 
613   PetscFunctionBegin;
614   PetscCall(KSPDestroy(&km->ksps[0]));
615   PetscCall(PetscFree(pc->data));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 /*
620      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
621      right-hand side to the parallel PC
622 */
623 static PetscErrorCode PCSetUp_MPI(PC pc)
624 {
625   PC_MPI      *km = (PC_MPI *)pc->data;
626   PetscMPIInt  rank, size;
627   PCMPICommand request;
628   PetscBool    newmatrix = PETSC_FALSE;
629 
630   PetscFunctionBegin;
631   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
632   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
633   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
634 
635   if (!pc->setupcalled) {
636     if (!km->alwaysuseserver) {
637       PetscInt n;
638       Mat      sA;
639       /* short circuit for small systems */
640       PetscCall(PCGetOperators(pc, &sA, &sA));
641       PetscCall(MatGetSize(sA, &n, NULL));
642       if (n < 2 * km->mincntperrank - 1 || size == 1) {
643         pc->ops->setup   = NULL;
644         pc->ops->apply   = PCApply_Seq;
645         pc->ops->destroy = PCDestroy_Seq;
646         pc->ops->view    = PCView_Seq;
647         PetscCall(PCSetUp_Seq(pc));
648         PetscFunctionReturn(PETSC_SUCCESS);
649       }
650     }
651 
652     request = PCMPI_CREATE;
653     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
654     PetscCall(PCMPICreate(pc));
655     newmatrix = PETSC_TRUE;
656   }
657   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
658 
659   if (newmatrix) {
660     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
661     request = PCMPI_SET_MAT;
662     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
663     PetscCall(PCMPISetMat(pc));
664   } else {
665     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
666     request = PCMPI_UPDATE_MAT_VALUES;
667     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
668     PetscCall(PCMPIUpdateMatValues(pc));
669   }
670   PetscFunctionReturn(PETSC_SUCCESS);
671 }
672 
673 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
674 {
675   PCMPICommand request = PCMPI_SOLVE;
676 
677   PetscFunctionBegin;
678   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
679   PetscCall(PCMPISolve(pc, b, x));
680   PetscFunctionReturn(PETSC_SUCCESS);
681 }
682 
683 static PetscErrorCode PCDestroy_MPI(PC pc)
684 {
685   PCMPICommand request = PCMPI_DESTROY;
686 
687   PetscFunctionBegin;
688   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
689   PetscCall(PCMPIDestroy(pc));
690   PetscCall(PetscFree(pc->data));
691   PetscFunctionReturn(PETSC_SUCCESS);
692 }
693 
694 /*
695      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
696 */
697 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
698 {
699   PC_MPI     *km = (PC_MPI *)pc->data;
700   MPI_Comm    comm;
701   PetscMPIInt size;
702 
703   PetscFunctionBegin;
704   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
705   PetscCallMPI(MPI_Comm_size(comm, &size));
706   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
707   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
708   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
709   PetscFunctionReturn(PETSC_SUCCESS);
710 }
711 
712 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
713 {
714   PC_MPI *km = (PC_MPI *)pc->data;
715 
716   PetscFunctionBegin;
717   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
718   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
719   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
720   PetscOptionsHeadEnd();
721   PetscFunctionReturn(PETSC_SUCCESS);
722 }
723 
724 /*MC
725      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
726 
727    Options Database Keys for the Server:
728 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
729 -  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
730 
731    Options Database Keys for a specific `KSP` object
732 +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
733 -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
734 
735    Level: developer
736 
737    Notes:
738    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
739 
740    It can be particularly useful for user OpenMP code or potentially user GPU code.
741 
742    When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
743    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
744 
745    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
746    because they are not the actual solver objects.
747 
748    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
749    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
750 
751    Developer Note:
752    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
753    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
754 
755 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
756 M*/
757 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
758 {
759   PC_MPI *km;
760   char   *found = NULL;
761 
762   PetscFunctionBegin;
763   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
764   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
765 
766   /* material from PCSetType() */
767   PetscTryTypeMethod(pc, destroy);
768   pc->ops->destroy = NULL;
769   pc->data         = NULL;
770 
771   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
772   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
773   pc->modifysubmatrices  = NULL;
774   pc->modifysubmatricesP = NULL;
775   pc->setupcalled        = 0;
776 
777   PetscCall(PetscNew(&km));
778   pc->data = (void *)km;
779 
780   km->mincntperrank = 10000;
781 
782   pc->ops->setup          = PCSetUp_MPI;
783   pc->ops->apply          = PCApply_MPI;
784   pc->ops->destroy        = PCDestroy_MPI;
785   pc->ops->view           = PCView_MPI;
786   pc->ops->setfromoptions = PCSetFromOptions_MPI;
787   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
788   PetscFunctionReturn(PETSC_SUCCESS);
789 }
790