xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision fdf6c4e30aafdbc795e4f76379caa977fd5cdf5a) !
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 
16 #define PC_MPI_MAX_RANKS 256
17 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
18 
19 typedef struct {
20   KSP              ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
21   PetscMPIInt      sendcount[PC_MPI_MAX_RANKS],displ[PC_MPI_MAX_RANKS];  /* For scatter/gather of rhs/solution */
22   PetscMPIInt      NZ[PC_MPI_MAX_RANKS],NZdispl[PC_MPI_MAX_RANKS];       /* For scatter of nonzero values in matrix (and nonzero column indices initially */
23   PetscInt         mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
24   PetscBool        alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
25 } PC_MPI;
26 
27 typedef enum { PCMPI_EXIT,                /* exit the PC server loop, means the controlling sequential program is done */
28                PCMPI_CREATE,
29                PCMPI_SET_MAT,             /* set original matrix (or one with different nonzero pattern) */
30                PCMPI_UPDATE_MAT_VALUES,   /* update current matrix with new nonzero values */
31                PCMPI_SOLVE,
32                PCMPI_VIEW,
33                PCMPI_DESTROY              /* destroy a KSP that is no longer needed */
34              } PCMPICommand;
35 
36 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
37 static PetscBool PCMPICommSet = PETSC_FALSE;
38 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS],PCMPIKSPCounts[PC_MPI_MAX_RANKS],PCMPIMatCounts[PC_MPI_MAX_RANKS],PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
39 
40 static PetscErrorCode PCMPICommsCreate(void)
41 {
42   MPI_Comm    comm = PC_MPI_COMM_WORLD;
43   PetscMPIInt size,rank,i;
44 
45   PetscFunctionBegin;
46   PetscCallMPI(MPI_Comm_size(comm,&size));
47   PetscCheck(size <= PC_MPI_MAX_RANKS,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
48   PetscCallMPI(MPI_Comm_rank(comm,&rank));
49   /* comm for size 1 is useful only for debugging */
50   for (i=0; i<size; i++) {
51     PetscMPIInt color = rank < i+1 ? 0 : MPI_UNDEFINED;
52     PetscCallMPI(MPI_Comm_split(comm,color,0,&PCMPIComms[i]));
53     PCMPISolveCounts[i] = 0;
54     PCMPIKSPCounts[i] = 0;
55   }
56   PCMPICommSet = PETSC_TRUE;
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode PCMPICommsDestroy(void)
61 {
62   MPI_Comm    comm = PC_MPI_COMM_WORLD;
63   PetscMPIInt size,rank,i;
64 
65   PetscFunctionBegin;
66   if (!PCMPICommSet) PetscFunctionReturn(0);
67   PetscCallMPI(MPI_Comm_size(comm,&size));
68   PetscCallMPI(MPI_Comm_rank(comm,&rank));
69   for (i=0; i<size; i++) {
70     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
71   }
72   PCMPICommSet = PETSC_FALSE;
73   PetscFunctionReturn(0);
74 }
75 
76 static PetscErrorCode PCMPICreate(PC pc)
77 {
78   PC_MPI      *km = pc ? (PC_MPI*)pc->data : NULL;
79   MPI_Comm    comm = PC_MPI_COMM_WORLD;
80   KSP         ksp;
81   PetscInt    N[2],mincntperrank = 0;
82   PetscMPIInt size;
83   Mat         sA;
84   char        *prefix;
85   PetscMPIInt len = 0;
86 
87   PetscFunctionBegin;
88   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
89   PetscCallMPI(MPI_Comm_size(comm,&size));
90   if (pc) {
91     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF,"Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
92     PetscCall(PCGetOperators(pc,&sA,&sA));
93     PetscCall(MatGetSize(sA,&N[0],&N[1]));
94   }
95   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
96 
97   /* choose a suitable sized MPI_Comm for the problem to be solved on */
98   if (km) mincntperrank = km->mincntperrank;
99   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
100   comm = PCMPIComms[PetscMin(size,PetscMax(1,N[0]/mincntperrank)) - 1];
101   if (comm == MPI_COMM_NULL) {
102     ksp = NULL;
103     PetscFunctionReturn(0);
104   }
105   PetscCall(KSPCreate(comm,&ksp));
106   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
107   if (pc) {
108     size_t slen;
109 
110     PetscCallMPI(MPI_Comm_size(comm,&size));
111     PCMPIKSPCounts[size]++;
112     PetscCall(PCGetOptionsPrefix(pc,(const char**)&prefix));
113     PetscCall(PetscStrlen(prefix,&slen));
114     len = (PetscMPIInt)slen;
115   }
116   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
117   if (len) {
118     if (!pc) {
119       PetscCall(PetscMalloc1(len+1,&prefix));
120     }
121     PetscCallMPI(MPI_Bcast(prefix, len+1, MPI_CHAR, 0, comm));
122     PetscCall(KSPSetOptionsPrefix(ksp,prefix));
123   }
124   PetscCall(KSPAppendOptionsPrefix(ksp,"mpi_"));
125   PetscFunctionReturn(0);
126 }
127 
128 static PetscErrorCode PCMPISetMat(PC pc)
129 {
130   PC_MPI           *km = pc ? (PC_MPI*)pc->data : NULL;
131   Mat               A;
132   PetscInt          N[2],n,*ia,*ja,j;
133   Mat               sA;
134   MPI_Comm          comm = PC_MPI_COMM_WORLD;
135   KSP               ksp;
136   PetscLayout       layout;
137   const PetscInt    *IA = NULL,*JA = NULL;
138   const PetscInt    *range;
139   PetscMPIInt       *NZ = NULL,sendcounti[PC_MPI_MAX_RANKS],displi[PC_MPI_MAX_RANKS],*NZdispl = NULL,nz,size,i;
140   PetscScalar       *a;
141   const PetscScalar *sa = NULL;
142 
143   PetscFunctionBegin;
144   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
145   if (!ksp) PetscFunctionReturn(0);
146   PetscCall(PetscObjectGetComm((PetscObject)ksp,&comm));
147   if (pc) {
148     PetscCallMPI(MPI_Comm_size(comm,&size));
149     PCMPIMatCounts[size]++;
150     PetscCall(PCGetOperators(pc,&sA,&sA));
151     PetscCall(MatGetSize(sA,&N[0],&N[1]));
152   }
153   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
154 
155   /* determine ownership ranges of matrix */
156   PetscCall(PetscLayoutCreate(comm,&layout));
157   PetscCall(PetscLayoutSetBlockSize(layout,1));
158   PetscCall(PetscLayoutSetSize(layout,N[0]));
159   PetscCall(PetscLayoutSetUp(layout));
160   PetscCall(PetscLayoutGetLocalSize(layout,&n));
161 
162   /* copy over the matrix nonzero structure and values */
163   if (pc) {
164     NZ = km->NZ;
165     NZdispl = km->NZdispl;
166     PetscCall(PetscLayoutGetRanges(layout,&range));
167     PetscCall(MatGetRowIJ(sA,0,PETSC_FALSE,PETSC_FALSE,NULL,&IA,&JA,NULL));
168     for (i=0; i<size; i++) {
169       sendcounti[i] = (PetscMPIInt) (1 + range[i+1] - range[i]);
170       NZ[i]         = (PetscMPIInt) (IA[range[i+1]] - IA[range[i]]);
171     }
172     displi[0] = 0;
173     NZdispl[0] = 0;
174     for (j=1; j<size; j++) {
175       displi[j] = displi[j-1] + sendcounti[j-1] - 1;
176       NZdispl[j] = NZdispl[j-1] + NZ[j-1];
177     }
178     PetscCall(MatSeqAIJGetArrayRead(sA,&sa));
179   }
180   PetscCall(PetscLayoutDestroy(&layout));
181   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
182 
183   PetscCall(PetscMalloc3(n+1,&ia,nz,&ja,nz,&a));
184   PetscCallMPI(MPI_Scatterv(IA,sendcounti, displi, MPIU_INT, ia, n+1, MPIU_INT, 0,comm));
185   PetscCallMPI(MPI_Scatterv(JA,NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0,comm));
186   PetscCallMPI(MPI_Scatterv(sa,NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0,comm));
187 
188   if (pc) {
189     PetscCall(MatSeqAIJRestoreArrayRead(sA,&sa));
190     PetscCall(MatRestoreRowIJ(sA,0,PETSC_FALSE,PETSC_FALSE,NULL,&IA,&JA,NULL));
191   }
192 
193   for (j=1; j<n+1; j++) {
194     ia[j] -= ia[0];
195   }
196   ia[0] = 0;
197   PetscCall(MatCreateMPIAIJWithArrays(comm,n,n,N[0],N[0],ia,ja,a,&A));
198   PetscCall(MatSetOptionsPrefix(A,"mpi_"));
199 
200   PetscCall(PetscFree3(ia,ja,a));
201   PetscCall(KSPSetOperators(ksp,A,A));
202   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A,&ksp->vec_sol,&ksp->vec_rhs));
203   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
204     const PetscInt *range;
205 
206     PetscCall(VecGetOwnershipRanges(ksp->vec_sol,&range));
207     for (i=0; i<size; i++) {
208       km->sendcount[i] = (PetscMPIInt) (range[i+1] - range[i]);
209       km->displ[i]     = (PetscMPIInt) range[i];
210     }
211   }
212   PetscCall(MatDestroy(&A));
213   PetscCall(KSPSetFromOptions(ksp));
214   PetscFunctionReturn(0);
215 }
216 
217 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
218 {
219   PC_MPI           *km = pc ? (PC_MPI*)pc->data : NULL;
220   KSP               ksp;
221   Mat               sA,A;
222   MPI_Comm          comm = PC_MPI_COMM_WORLD;
223   PetscScalar       *a;
224   PetscCount        nz;
225   const PetscScalar *sa = NULL;
226 
227   PetscFunctionBegin;
228   if (pc) {
229     PetscMPIInt size;
230 
231     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size));
232     PCMPIMatCounts[size]++;
233     PetscCall(PCGetOperators(pc,&sA,&sA));
234     PetscCall(MatSeqAIJGetArrayRead(sA,&sa));
235   }
236   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
237   if (!ksp) PetscFunctionReturn(0);
238   PetscCall(PetscObjectGetComm((PetscObject)ksp,&comm));
239   PetscCall(KSPGetOperators(ksp,NULL,&A));
240   PetscCall(MatMPIAIJGetNumberNonzeros(A,&nz));
241   PetscCall(PetscMalloc1(nz,&a));
242   PetscCallMPI(MPI_Scatterv(sa,pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0,comm));
243   if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA,&sa));
244   PetscCall(MatUpdateMPIAIJWithArray(A,a));
245   PetscCall(PetscFree(a));
246   PetscFunctionReturn(0);
247 }
248 
249 static PetscErrorCode PCMPISolve(PC pc,Vec B,Vec X)
250 {
251   PC_MPI           *km = pc ? (PC_MPI*)pc->data : NULL;
252   KSP               ksp;
253   MPI_Comm          comm = PC_MPI_COMM_WORLD;
254   const PetscScalar *sb = NULL,*x;
255   PetscScalar       *b,*sx = NULL;
256   PetscInt          n;
257 
258   PetscFunctionBegin;
259   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
260   if (!ksp) PetscFunctionReturn(0);
261   PetscCall(PetscObjectGetComm((PetscObject)ksp,&comm));
262 
263   /* TODO: optimize code to not require building counts/displ everytime */
264 
265   /* scatterv rhs */
266   if (pc) {
267     PetscMPIInt size;
268 
269     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size));
270     PCMPISolveCounts[size]++;
271     PetscCall(VecGetArrayRead(B,&sb));
272   }
273   PetscCall(VecGetLocalSize(ksp->vec_rhs,&n));
274   PetscCall(VecGetArray(ksp->vec_rhs,&b));
275   PetscCallMPI(MPI_Scatterv(sb,pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0,comm));
276   PetscCall(VecRestoreArray(ksp->vec_rhs,&b));
277   if (pc) PetscCall(VecRestoreArrayRead(B,&sb));
278 
279   PetscCall(KSPSolve(ksp,NULL,NULL));
280 
281   /* gather solution */
282   PetscCall(VecGetArrayRead(ksp->vec_sol,&x));
283   if (pc) PetscCall(VecGetArray(X,&sx));
284   PetscCallMPI(MPI_Gatherv(x,n,MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0,comm));
285   if (pc) PetscCall(VecRestoreArray(X,&sx));
286   PetscCall(VecRestoreArrayRead(ksp->vec_sol,&x));
287   PetscFunctionReturn(0);
288 }
289 
290 static PetscErrorCode PCMPIDestroy(PC pc)
291 {
292   PC_MPI  *km = pc ? (PC_MPI*)pc->data : NULL;
293   KSP      ksp;
294   MPI_Comm comm = PC_MPI_COMM_WORLD;
295 
296   PetscFunctionBegin;
297   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
298   if (!ksp) PetscFunctionReturn(0);
299   PetscCall(KSPDestroy(&ksp));
300   PetscFunctionReturn(0);
301 }
302 
303 /*@C
304      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
305      parallel KSP solves and management of parallel KSP objects.
306 
307      Logically collective on all MPI ranks except 0
308 
309    Options Database:
310 +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
311 -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
312 
313      Notes:
314       This is normally started automatically in `PetscInitialize()` when the option is provided
315 
316      Developer Notes:
317        When called on rank zero this sets PETSC_COMM_WORLD to PETSC_COMM_SELF to allow a main program
318        written with PETSC_COMM_WORLD to run correctly on the single rank while all the ranks
319        (that would normally be sharing PETSC_COMM_WORLD) to run the solver server.
320 
321        Can this be integrated into the PetscDevice abstraction that is currently being developed?
322 
323      Level: developer
324 
325 .seealso: PCMPIServerEnd(), PCMPI
326 @*/
327 PetscErrorCode PCMPIServerBegin(void)
328 {
329   PetscMPIInt rank;
330 
331   PetscFunctionBegin;
332   PetscCall(PetscInfo(NULL,"Starting MPI Linear Solver Server"));
333   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD,&rank));
334   if (rank == 0) {
335     PETSC_COMM_WORLD = PETSC_COMM_SELF;
336     PetscFunctionReturn(0);
337   }
338 
339   while (PETSC_TRUE) {
340     PCMPICommand request = PCMPI_CREATE;
341     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
342     switch (request) {
343     case PCMPI_CREATE:
344       PetscCall(PCMPICreate(NULL));
345       break;
346     case PCMPI_SET_MAT:
347       PetscCall(PCMPISetMat(NULL));
348       break;
349     case PCMPI_UPDATE_MAT_VALUES:
350       PetscCall(PCMPIUpdateMatValues(NULL));
351       break;
352     case PCMPI_VIEW:
353       // PetscCall(PCMPIView(NULL));
354       break;
355     case PCMPI_SOLVE:
356       PetscCall(PCMPISolve(NULL,NULL,NULL));
357       break;
358     case PCMPI_DESTROY:
359       PetscCall(PCMPIDestroy(NULL));
360       break;
361     case PCMPI_EXIT:
362       PetscCall(PetscFinalize());
363       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
364       break;
365     default:
366       break;
367     }
368   }
369   PetscFunctionReturn(0);
370 }
371 
372 /*@C
373      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
374      parallel KSP solves and management of parallel KSP objects.
375 
376      Logically collective on all MPI ranks except 0
377 
378      Notes:
379       This is normally ended automatically in `PetscFinalize()` when the option is provided
380 
381      Level: developer
382 
383 .seealso: PCMPIServerBegin(), PCMPI
384 @*/
385 PetscErrorCode PCMPIServerEnd(void)
386 {
387   PCMPICommand request = PCMPI_EXIT;
388 
389   PetscFunctionBegin;
390   if (PetscGlobalRank == 0) {
391     PetscViewer       viewer = NULL;
392     PetscViewerFormat format;
393 
394     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
395     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
396     PetscOptionsBegin(PETSC_COMM_SELF,NULL,"MPI linear solver server options",NULL);
397     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view","View information about system solved with the server","PCMPI",&viewer,&format,NULL));
398     PetscOptionsEnd();
399     if (viewer) {
400       PetscBool isascii;
401 
402       PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
403       if (isascii) {
404         PetscMPIInt size;
405         PetscInt    i, ksp = 0, mat = 0, solve = 0;
406 
407         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size));
408         for (i=1; i<size; i++) {
409           ksp += PCMPIKSPCounts[i];
410           mat += PCMPIMatCounts[i];
411           solve += PCMPISolveCounts[i];
412         }
413         PetscCall(PetscViewerASCIIPrintf(viewer,"MPI linear solver server:\n"));
414         PetscCall(PetscViewerASCIIPrintf(viewer,"  Parallel KSPs  %" PetscInt_FMT " Mats  %" PetscInt_FMT " Solves %" PetscInt_FMT "\n",ksp,mat,solve));
415         PetscCall(PetscViewerASCIIPrintf(viewer,"  Sequential KSPs  %" PetscInt_FMT " Solves %" PetscInt_FMT "\n",PCMPIKSPCountsSeq,PCMPISolveCountsSeq));
416       }
417       PetscCall(PetscViewerDestroy(&viewer));
418     }
419   }
420   PetscCall(PCMPICommsDestroy());
421   PetscFunctionReturn(0);
422 }
423 
424 /*
425     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
426     because, for example, the problem is small. This version is more efficient because it does not require copying any data
427 */
428 static PetscErrorCode PCSetUp_Seq(PC pc)
429 {
430   PC_MPI      *km = (PC_MPI*)pc->data;
431   Mat         sA;
432   const char* prefix;
433 
434   PetscFunctionBegin;
435   PetscCall(PCGetOperators(pc,NULL,&sA));
436   PetscCall(PCGetOptionsPrefix(pc,&prefix));
437   PetscCall(KSPCreate(PETSC_COMM_SELF,&km->ksps[0]));
438   PetscCall(KSPSetOptionsPrefix(km->ksps[0],prefix));
439   PetscCall(KSPAppendOptionsPrefix(km->ksps[0],"mpi_"));
440   PetscCall(KSPSetOperators(km->ksps[0],sA,sA));
441   PetscCall(KSPSetFromOptions(km->ksps[0]));
442   PetscCall(KSPSetUp(km->ksps[0]));
443   PetscInfo((PetscObject)pc,"MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
444   PCMPIKSPCountsSeq++;
445   PetscFunctionReturn(0);
446 }
447 
448 static PetscErrorCode PCApply_Seq(PC pc,Vec b,Vec x)
449 {
450   PC_MPI *km = (PC_MPI*)pc->data;
451 
452   PetscFunctionBegin;
453   PetscCall(KSPSolve(km->ksps[0],b,x));
454   PCMPISolveCountsSeq++;
455   PetscFunctionReturn(0);
456 }
457 
458 static PetscErrorCode PCView_Seq(PC pc,PetscViewer viewer)
459 {
460   PC_MPI     *km = (PC_MPI*)pc->data;
461   const char *prefix;
462 
463   PetscFunctionBegin;
464   PetscCall(PetscViewerASCIIPrintf(viewer,"Running MPI linear solver server directly on rank 0 due to its small size\n"));
465   PetscCall(PetscViewerASCIIPrintf(viewer,"Desired minimum number of nonzeros per rank for MPI parallel solve %d\n",(int)km->mincntperrank));
466   PetscCall(PCGetOptionsPrefix(pc,&prefix));
467   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer,"*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n",prefix));
468   else PetscCall(PetscViewerASCIIPrintf(viewer,"*** Use -mpi_ksp_view to see the MPI KSP parameters***\n"));
469   PetscFunctionReturn(0);
470 }
471 
472 static PetscErrorCode PCDestroy_Seq(PC pc)
473 {
474   PC_MPI     *km = (PC_MPI*)pc->data;
475 
476   PetscFunctionBegin;
477   PetscCall(KSPDestroy(&km->ksps[0]));
478   PetscCall(PetscFree(pc->data));
479   PetscFunctionReturn(0);
480 }
481 
482 /*
483      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
484      right hand side to the parallel PC
485 */
486 static PetscErrorCode PCSetUp_MPI(PC pc)
487 {
488   PC_MPI         *km = (PC_MPI*)pc->data;
489   PetscMPIInt    rank,size;
490   PCMPICommand   request;
491   PetscBool      newmatrix = PETSC_FALSE;
492 
493   PetscFunctionBegin;
494   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD,&rank));
495   PetscCheck(rank == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
496   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD,&size));
497 
498   if (!pc->setupcalled) {
499     if (!km->alwaysuseserver) {
500       PetscInt n;
501       Mat      sA;
502       /* short circuit for small systems */
503       PetscCall(PCGetOperators(pc,&sA,&sA));
504       PetscCall(MatGetSize(sA,&n,NULL));
505       if (n < 2*km->mincntperrank-1 || size == 1) {
506         pc->ops->setup          = NULL;
507         pc->ops->apply          = PCApply_Seq;
508         pc->ops->destroy        = PCDestroy_Seq;
509         pc->ops->view           = PCView_Seq;
510         PetscCall(PCSetUp_Seq(pc));
511         PetscFunctionReturn(0);
512       }
513     }
514 
515     request = PCMPI_CREATE;
516     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
517     PetscCall(PCMPICreate(pc));
518     newmatrix = PETSC_TRUE;
519   } if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
520 
521   if (newmatrix) {
522     PetscInfo((PetscObject)pc,"New matrix or matrix has changed nonzero structure\n");
523     request = PCMPI_SET_MAT;
524     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
525     PetscCall(PCMPISetMat(pc));
526   } else {
527     PetscInfo((PetscObject)pc,"Matrix has only changed nozero values\n");
528     request = PCMPI_UPDATE_MAT_VALUES;
529     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
530     PetscCall(PCMPIUpdateMatValues(pc));
531   }
532   PetscFunctionReturn(0);
533 }
534 
535 static PetscErrorCode PCApply_MPI(PC pc,Vec b,Vec x)
536 {
537   PCMPICommand request = PCMPI_SOLVE;
538 
539   PetscFunctionBegin;
540   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
541   PetscCall(PCMPISolve(pc,b,x));
542   PetscFunctionReturn(0);
543 }
544 
545 PetscErrorCode PCDestroy_MPI(PC pc)
546 {
547   PCMPICommand request = PCMPI_DESTROY;
548 
549   PetscFunctionBegin;
550   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
551   PetscCall(PCMPIDestroy(pc));
552   PetscCall(PetscFree(pc->data));
553   PetscFunctionReturn(0);
554 }
555 
556 /*
557      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
558 */
559 PetscErrorCode PCView_MPI(PC pc,PetscViewer viewer)
560 {
561   PC_MPI      *km = (PC_MPI*)pc->data;
562   MPI_Comm    comm;
563   PetscMPIInt size;
564   const char  *prefix;
565 
566   PetscFunctionBegin;
567   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0],&comm));
568   PetscCallMPI(MPI_Comm_size(comm,&size));
569   PetscCall(PetscViewerASCIIPrintf(viewer,"Size of MPI communicator used for MPI parallel KSP solve %d\n",size));
570   PetscCall(PetscViewerASCIIPrintf(viewer,"Desired minimum number of nonzeros per rank for MPI parallel solve %d\n",(int)km->mincntperrank));
571   PetscCall(PCGetOptionsPrefix(pc,&prefix));
572   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer,"*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n",prefix));
573   else PetscCall(PetscViewerASCIIPrintf(viewer,"*** Use -mpi_ksp_view to see the MPI KSP parameters***\n"));
574   PetscFunctionReturn(0);
575 }
576 
577 PetscErrorCode PCSetFromOptions_MPI(PetscOptionItems *PetscOptionsObject,PC pc)
578 {
579   PC_MPI *km = (PC_MPI*)pc->data;
580 
581   PetscFunctionBegin;
582   PetscOptionsHeadBegin(PetscOptionsObject,"MPI linear solver server options");
583   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank","Desired minimum number of nonzeros per rank","None",km->mincntperrank,&km->mincntperrank,NULL));
584   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server","Use the server even if only one rank is used for the solve (for debugging)","None",km->alwaysuseserver,&km->alwaysuseserver,NULL));
585   PetscOptionsHeadEnd();
586   PetscFunctionReturn(0);
587 }
588 
589 /*MC
590      PCMPI - Calls an MPI parallel KSP to solve a linear system from user code running on one process
591 
592    Level: beginner
593 
594    Options Database:
595 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
596 .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
597 .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
598 -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
599 
600    Notes:
601    The options database prefix for the MPI parallel KSP and PC is -mpi_
602 
603    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
604    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
605 
606    It can be particularly useful for user OpenMP code or potentially user GPU code.
607 
608    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a KSP with the options prefix of -mpi)
609    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the KSP directly.
610 
611 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
612 
613 M*/
614 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
615 {
616   PC_MPI *km;
617 
618   PetscFunctionBegin;
619   PetscCall(PetscNewLog(pc,&km));
620   pc->data = (void*)km;
621 
622   km->mincntperrank = 10000;
623 
624   pc->ops->setup          = PCSetUp_MPI;
625   pc->ops->apply          = PCApply_MPI;
626   pc->ops->destroy        = PCDestroy_MPI;
627   pc->ops->view           = PCView_MPI;
628   pc->ops->setfromoptions = PCSetFromOptions_MPI;
629   PetscFunctionReturn(0);
630 }
631