1f1f2ae84SBarry Smith /*
2f1f2ae84SBarry Smith This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3f1f2ae84SBarry Smith It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4f1f2ae84SBarry Smith
5dd8e379bSPierre Jolivet That program may use OpenMP to compute the right-hand side and matrix for the linear system
6f1f2ae84SBarry Smith
7f1f2ae84SBarry Smith The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8f1f2ae84SBarry Smith
9f1f2ae84SBarry Smith The resulting KSP and PC can only be controlled via the options database, though some common commands
10f1f2ae84SBarry Smith could be passed through the server.
11f1f2ae84SBarry Smith
12f1f2ae84SBarry Smith */
139f0612e4SBarry Smith #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/
14f1f2ae84SBarry Smith #include <petsc/private/kspimpl.h>
15789afff4SPierre Jolivet #include <petscts.h>
16789afff4SPierre Jolivet #include <petsctao.h>
179f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
189f0612e4SBarry Smith #include <pthread.h>
199f0612e4SBarry Smith #endif
20f1f2ae84SBarry Smith
21f1f2ae84SBarry Smith #define PC_MPI_MAX_RANKS 256
22f1f2ae84SBarry Smith #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
23f1f2ae84SBarry Smith
24f1f2ae84SBarry Smith typedef struct {
259f0612e4SBarry Smith KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */
263fa2bd1cSBarry Smith PetscInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
273fa2bd1cSBarry Smith PetscInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */
289f0612e4SBarry Smith PetscInt mincntperrank; /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */
299f0612e4SBarry Smith PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI process is used for the solve */
30f1f2ae84SBarry Smith } PC_MPI;
31f1f2ae84SBarry Smith
329371c9d4SSatish Balay typedef enum {
339371c9d4SSatish Balay PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
34f1f2ae84SBarry Smith PCMPI_CREATE,
35f1f2ae84SBarry Smith PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */
36f1f2ae84SBarry Smith PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
37f1f2ae84SBarry Smith PCMPI_SOLVE,
38f1f2ae84SBarry Smith PCMPI_VIEW,
399f0612e4SBarry Smith PCMPI_DESTROY /* destroy a PC that is no longer needed */
40f1f2ae84SBarry Smith } PCMPICommand;
41f1f2ae84SBarry Smith
42f1f2ae84SBarry Smith static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS];
43f1f2ae84SBarry Smith static PetscBool PCMPICommSet = PETSC_FALSE;
44f1f2ae84SBarry Smith static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
455316cbedSBarry Smith static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
469f0612e4SBarry Smith static PetscLogEvent EventServerDist, EventServerDistMPI;
479f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
489f0612e4SBarry Smith static pthread_mutex_t *PCMPIServerLocks;
499f0612e4SBarry Smith #else
509f0612e4SBarry Smith static void *PCMPIServerLocks;
519f0612e4SBarry Smith #endif
52f1f2ae84SBarry Smith
PCMPICommsCreate(void)53d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICommsCreate(void)
54d71ae5a4SJacob Faibussowitsch {
55f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD;
56f1f2ae84SBarry Smith PetscMPIInt size, rank, i;
57f1f2ae84SBarry Smith
58f1f2ae84SBarry Smith PetscFunctionBegin;
59f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size));
60f1f2ae84SBarry Smith PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
61f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_rank(comm, &rank));
62f1f2ae84SBarry Smith /* comm for size 1 is useful only for debugging */
63f1f2ae84SBarry Smith for (i = 0; i < size; i++) {
64f1f2ae84SBarry Smith PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
65f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
66f1f2ae84SBarry Smith PCMPISolveCounts[i] = 0;
67f1f2ae84SBarry Smith PCMPIKSPCounts[i] = 0;
685316cbedSBarry Smith PCMPIIterations[i] = 0;
695316cbedSBarry Smith PCMPISizes[i] = 0;
70f1f2ae84SBarry Smith }
71f1f2ae84SBarry Smith PCMPICommSet = PETSC_TRUE;
723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
73f1f2ae84SBarry Smith }
74f1f2ae84SBarry Smith
PCMPICommsDestroy(void)759f0612e4SBarry Smith static PetscErrorCode PCMPICommsDestroy(void)
76d71ae5a4SJacob Faibussowitsch {
77f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD;
78f1f2ae84SBarry Smith PetscMPIInt size, rank, i;
79f1f2ae84SBarry Smith
80f1f2ae84SBarry Smith PetscFunctionBegin;
813ba16761SJacob Faibussowitsch if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
82f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size));
83f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_rank(comm, &rank));
84f1f2ae84SBarry Smith for (i = 0; i < size; i++) {
85f1f2ae84SBarry Smith if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
86f1f2ae84SBarry Smith }
87f1f2ae84SBarry Smith PCMPICommSet = PETSC_FALSE;
883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
89f1f2ae84SBarry Smith }
90f1f2ae84SBarry Smith
PCMPICreate(PC pc)91d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICreate(PC pc)
92d71ae5a4SJacob Faibussowitsch {
93f1f2ae84SBarry Smith PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
94f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD;
95f1f2ae84SBarry Smith KSP ksp;
96f1f2ae84SBarry Smith PetscInt N[2], mincntperrank = 0;
97f1f2ae84SBarry Smith PetscMPIInt size;
98f1f2ae84SBarry Smith Mat sA;
993821be0aSBarry Smith char *cprefix = NULL;
100f1f2ae84SBarry Smith PetscMPIInt len = 0;
101f1f2ae84SBarry Smith
102f1f2ae84SBarry Smith PetscFunctionBegin;
1039f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE;
104f1f2ae84SBarry Smith if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
105f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size));
106f1f2ae84SBarry Smith if (pc) {
107f1f2ae84SBarry Smith if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
108f1f2ae84SBarry Smith PetscCall(PCGetOperators(pc, &sA, &sA));
109f1f2ae84SBarry Smith PetscCall(MatGetSize(sA, &N[0], &N[1]));
110f1f2ae84SBarry Smith }
111f1f2ae84SBarry Smith PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
112f1f2ae84SBarry Smith
113f1f2ae84SBarry Smith /* choose a suitable sized MPI_Comm for the problem to be solved on */
114f1f2ae84SBarry Smith if (km) mincntperrank = km->mincntperrank;
115f1f2ae84SBarry Smith PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
116f1f2ae84SBarry Smith comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
117f1f2ae84SBarry Smith if (comm == MPI_COMM_NULL) {
118f1f2ae84SBarry Smith ksp = NULL;
1199f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE;
1203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
121f1f2ae84SBarry Smith }
1220316ec64SBarry Smith PetscCall(PetscLogStagePush(PCMPIStage));
123f1f2ae84SBarry Smith PetscCall(KSPCreate(comm, &ksp));
1243821be0aSBarry Smith PetscCall(KSPSetNestLevel(ksp, 1));
1253821be0aSBarry Smith PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
1260316ec64SBarry Smith PetscCall(PetscLogStagePop());
127f1f2ae84SBarry Smith PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
128f1f2ae84SBarry Smith if (pc) {
129f1f2ae84SBarry Smith size_t slen;
1303821be0aSBarry Smith const char *prefix = NULL;
131f9818f3cSJose E. Roman char *found = NULL;
132f1f2ae84SBarry Smith
133f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size));
134dad3da8eSBarry Smith PCMPIKSPCounts[size - 1]++;
1353821be0aSBarry Smith /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
1363821be0aSBarry Smith PetscCall(PCGetOptionsPrefix(pc, &prefix));
1373821be0aSBarry Smith PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
1383821be0aSBarry Smith PetscCall(PetscStrallocpy(prefix, &cprefix));
1393821be0aSBarry Smith PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
1403821be0aSBarry Smith PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
1413821be0aSBarry Smith *found = 0;
1423821be0aSBarry Smith PetscCall(PetscStrlen(cprefix, &slen));
143835f2295SStefano Zampini PetscCall(PetscMPIIntCast(slen, &len));
144f1f2ae84SBarry Smith }
145f1f2ae84SBarry Smith PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
146f1f2ae84SBarry Smith if (len) {
1473821be0aSBarry Smith if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
1483821be0aSBarry Smith PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
1493821be0aSBarry Smith PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
150f1f2ae84SBarry Smith }
1513821be0aSBarry Smith PetscCall(PetscFree(cprefix));
1529f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE;
1533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
154f1f2ae84SBarry Smith }
155f1f2ae84SBarry Smith
PCMPISetMat(PC pc)156d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISetMat(PC pc)
157d71ae5a4SJacob Faibussowitsch {
158f1f2ae84SBarry Smith PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
159f1f2ae84SBarry Smith Mat A;
1609f0612e4SBarry Smith PetscInt m, n, j, bs;
161f1f2ae84SBarry Smith Mat sA;
162f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD;
163f1f2ae84SBarry Smith KSP ksp;
164f1f2ae84SBarry Smith PetscLayout layout;
1659f0612e4SBarry Smith const PetscInt *IA = NULL, *JA = NULL, *ia, *ja;
166f1f2ae84SBarry Smith const PetscInt *range;
1673fa2bd1cSBarry Smith PetscInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz;
1683fa2bd1cSBarry Smith PetscMPIInt size, i;
1699f0612e4SBarry Smith const PetscScalar *a = NULL, *sa;
1709f0612e4SBarry Smith PetscInt matproperties[8] = {0}, rstart, rend;
1713821be0aSBarry Smith char *cprefix;
172f1f2ae84SBarry Smith
173f1f2ae84SBarry Smith PetscFunctionBegin;
174f1f2ae84SBarry Smith PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
1753ba16761SJacob Faibussowitsch if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
1769f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE;
17745682376SBarry Smith PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
178f1f2ae84SBarry Smith PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
179f1f2ae84SBarry Smith if (pc) {
18068a21331SBarry Smith PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
1813821be0aSBarry Smith const char *prefix;
1823821be0aSBarry Smith size_t clen;
18368a21331SBarry Smith
184f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size));
185dad3da8eSBarry Smith PCMPIMatCounts[size - 1]++;
186f1f2ae84SBarry Smith PetscCall(PCGetOperators(pc, &sA, &sA));
18768a21331SBarry Smith PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
188dd0d27b1SBarry Smith PetscCall(MatGetBlockSize(sA, &bs));
18968a21331SBarry Smith matproperties[2] = bs;
19068a21331SBarry Smith PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
19168a21331SBarry Smith matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
19268a21331SBarry Smith PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
19368a21331SBarry Smith matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
19468a21331SBarry Smith PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
19568a21331SBarry Smith matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
19668a21331SBarry Smith PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
19768a21331SBarry Smith matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
1987a99bfcaSBarry Smith /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
1993821be0aSBarry Smith PetscCall(MatGetOptionsPrefix(sA, &prefix));
2003821be0aSBarry Smith PetscCall(PetscStrallocpy(prefix, &cprefix));
2013821be0aSBarry Smith PetscCall(PetscStrlen(cprefix, &clen));
2023821be0aSBarry Smith matproperties[7] = (PetscInt)clen;
203f1f2ae84SBarry Smith }
2043821be0aSBarry Smith PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
205f1f2ae84SBarry Smith
20668a21331SBarry Smith /* determine ownership ranges of matrix columns */
207f1f2ae84SBarry Smith PetscCall(PetscLayoutCreate(comm, &layout));
20868a21331SBarry Smith PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
20968a21331SBarry Smith PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
210f1f2ae84SBarry Smith PetscCall(PetscLayoutSetUp(layout));
211f1f2ae84SBarry Smith PetscCall(PetscLayoutGetLocalSize(layout, &n));
21268a21331SBarry Smith PetscCall(PetscLayoutDestroy(&layout));
21368a21331SBarry Smith
21468a21331SBarry Smith /* determine ownership ranges of matrix rows */
21568a21331SBarry Smith PetscCall(PetscLayoutCreate(comm, &layout));
21668a21331SBarry Smith PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
21768a21331SBarry Smith PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
21868a21331SBarry Smith PetscCall(PetscLayoutSetUp(layout));
21968a21331SBarry Smith PetscCall(PetscLayoutGetLocalSize(layout, &m));
2209f0612e4SBarry Smith PetscCall(PetscLayoutGetRange(layout, &rstart, &rend));
221f1f2ae84SBarry Smith
2229f0612e4SBarry Smith PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
223f1f2ae84SBarry Smith /* copy over the matrix nonzero structure and values */
224f1f2ae84SBarry Smith if (pc) {
2259f0612e4SBarry Smith PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
2269f0612e4SBarry Smith if (!PCMPIServerUseShmget) {
227f1f2ae84SBarry Smith NZ = km->NZ;
228f1f2ae84SBarry Smith NZdispl = km->NZdispl;
229f1f2ae84SBarry Smith PetscCall(PetscLayoutGetRanges(layout, &range));
230f1f2ae84SBarry Smith for (i = 0; i < size; i++) {
2313fa2bd1cSBarry Smith sendcounti[i] = 1 + range[i + 1] - range[i];
2323fa2bd1cSBarry Smith NZ[i] = IA[range[i + 1]] - IA[range[i]];
233f1f2ae84SBarry Smith }
234f1f2ae84SBarry Smith displi[0] = 0;
235f1f2ae84SBarry Smith NZdispl[0] = 0;
236f1f2ae84SBarry Smith for (j = 1; j < size; j++) {
237f1f2ae84SBarry Smith displi[j] = displi[j - 1] + sendcounti[j - 1] - 1;
238f1f2ae84SBarry Smith NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
239f1f2ae84SBarry Smith }
2409f0612e4SBarry Smith }
241f1f2ae84SBarry Smith PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
242f1f2ae84SBarry Smith }
243f1f2ae84SBarry Smith PetscCall(PetscLayoutDestroy(&layout));
244f1f2ae84SBarry Smith
2453821be0aSBarry Smith PetscCall(MatCreate(comm, &A));
2463821be0aSBarry Smith if (matproperties[7] > 0) {
247835f2295SStefano Zampini PetscMPIInt ni;
248835f2295SStefano Zampini
249835f2295SStefano Zampini PetscCall(PetscMPIIntCast(matproperties[7] + 1, &ni));
2503821be0aSBarry Smith if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
251835f2295SStefano Zampini PetscCallMPI(MPI_Bcast(cprefix, ni, MPI_CHAR, 0, comm));
2523821be0aSBarry Smith PetscCall(MatSetOptionsPrefix(A, cprefix));
2533821be0aSBarry Smith PetscCall(PetscFree(cprefix));
2543821be0aSBarry Smith }
2553821be0aSBarry Smith PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
2563821be0aSBarry Smith PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
2573821be0aSBarry Smith PetscCall(MatSetType(A, MATMPIAIJ));
2589f0612e4SBarry Smith
2599f0612e4SBarry Smith if (!PCMPIServerUseShmget) {
2603fa2bd1cSBarry Smith PetscCallMPI(MPI_Scatter(NZ, 1, MPIU_INT, &nz, 1, MPIU_INT, 0, comm));
2619f0612e4SBarry Smith PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
2623fa2bd1cSBarry Smith PetscCallMPI(MPIU_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, n + 1, MPIU_INT, 0, comm));
2633fa2bd1cSBarry Smith PetscCallMPI(MPIU_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm));
2643fa2bd1cSBarry Smith PetscCallMPI(MPIU_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm));
2659f0612e4SBarry Smith } else {
2669f0612e4SBarry Smith const void *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa};
2679f0612e4SBarry Smith PCMPIServerAddresses *addresses;
2689f0612e4SBarry Smith
2699f0612e4SBarry Smith PetscCall(PetscNew(&addresses));
2709f0612e4SBarry Smith addresses->n = 3;
2719f0612e4SBarry Smith PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr));
2729f0612e4SBarry Smith ia = rstart + (PetscInt *)addresses->addr[0];
2739f0612e4SBarry Smith ja = ia[0] + (PetscInt *)addresses->addr[1];
2749f0612e4SBarry Smith a = ia[0] + (PetscScalar *)addresses->addr[2];
27549abdd8aSBarry Smith PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, PCMPIServerAddressesDestroy));
2769f0612e4SBarry Smith }
2779f0612e4SBarry Smith
2789f0612e4SBarry Smith if (pc) {
2799f0612e4SBarry Smith PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
2809f0612e4SBarry Smith PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
2819f0612e4SBarry Smith }
2829f0612e4SBarry Smith PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
2839f0612e4SBarry Smith
2849f0612e4SBarry Smith PetscCall(PetscLogStagePush(PCMPIStage));
2853821be0aSBarry Smith PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
28668a21331SBarry Smith PetscCall(MatSetBlockSize(A, matproperties[2]));
2873821be0aSBarry Smith
28868a21331SBarry Smith if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
28968a21331SBarry Smith if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
29068a21331SBarry Smith if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
29168a21331SBarry Smith if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
292f1f2ae84SBarry Smith
2939f0612e4SBarry Smith if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a));
294f1f2ae84SBarry Smith PetscCall(KSPSetOperators(ksp, A, A));
295f1f2ae84SBarry Smith if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
2960316ec64SBarry Smith PetscCall(PetscLogStagePop());
2979f0612e4SBarry Smith if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */
298f1f2ae84SBarry Smith const PetscInt *range;
299f1f2ae84SBarry Smith
300f1f2ae84SBarry Smith PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
301f1f2ae84SBarry Smith for (i = 0; i < size; i++) {
3023fa2bd1cSBarry Smith km->sendcount[i] = range[i + 1] - range[i];
3033fa2bd1cSBarry Smith km->displ[i] = range[i];
304f1f2ae84SBarry Smith }
305f1f2ae84SBarry Smith }
306f1f2ae84SBarry Smith PetscCall(MatDestroy(&A));
30745682376SBarry Smith PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
308f1f2ae84SBarry Smith PetscCall(KSPSetFromOptions(ksp));
3099f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE;
3103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
311f1f2ae84SBarry Smith }
312f1f2ae84SBarry Smith
PCMPIUpdateMatValues(PC pc)313d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIUpdateMatValues(PC pc)
314d71ae5a4SJacob Faibussowitsch {
315f1f2ae84SBarry Smith PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
316f1f2ae84SBarry Smith KSP ksp;
317f1f2ae84SBarry Smith Mat sA, A;
318f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD;
3199f0612e4SBarry Smith const PetscInt *ia, *IA;
3209f0612e4SBarry Smith const PetscScalar *a;
321f1f2ae84SBarry Smith PetscCount nz;
322f1f2ae84SBarry Smith const PetscScalar *sa = NULL;
323dad3da8eSBarry Smith PetscMPIInt size;
3249f0612e4SBarry Smith PetscInt rstart, matproperties[4] = {0, 0, 0, 0};
325f1f2ae84SBarry Smith
326f1f2ae84SBarry Smith PetscFunctionBegin;
327f1f2ae84SBarry Smith if (pc) {
328f1f2ae84SBarry Smith PetscCall(PCGetOperators(pc, &sA, &sA));
329f1f2ae84SBarry Smith PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
3309f0612e4SBarry Smith PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
331f1f2ae84SBarry Smith }
332f1f2ae84SBarry Smith PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
3333ba16761SJacob Faibussowitsch if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
3349f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE;
33545682376SBarry Smith PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
336f1f2ae84SBarry Smith PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
337dad3da8eSBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size));
338dad3da8eSBarry Smith PCMPIMatCounts[size - 1]++;
339f1f2ae84SBarry Smith PetscCall(KSPGetOperators(ksp, NULL, &A));
3409f0612e4SBarry Smith PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
3419f0612e4SBarry Smith if (!PCMPIServerUseShmget) {
3423fa2bd1cSBarry Smith PetscInt petsc_nz;
3436497c311SBarry Smith
344f1f2ae84SBarry Smith PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
3453fa2bd1cSBarry Smith PetscCall(PetscIntCast(nz, &petsc_nz));
346f1f2ae84SBarry Smith PetscCall(PetscMalloc1(nz, &a));
3473fa2bd1cSBarry Smith PetscCallMPI(MPIU_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, petsc_nz, MPIU_SCALAR, 0, comm));
3489f0612e4SBarry Smith } else {
3499f0612e4SBarry Smith PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
3509f0612e4SBarry Smith PCMPIServerAddresses *addresses;
351*2a8381b2SBarry Smith PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", &addresses));
3529f0612e4SBarry Smith ia = rstart + (PetscInt *)addresses->addr[0];
3539f0612e4SBarry Smith a = ia[0] + (PetscScalar *)addresses->addr[2];
3549f0612e4SBarry Smith }
3559f0612e4SBarry Smith PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
35668a21331SBarry Smith if (pc) {
35768a21331SBarry Smith PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
35868a21331SBarry Smith
35968a21331SBarry Smith PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
3609f0612e4SBarry Smith PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
36168a21331SBarry Smith
36268a21331SBarry Smith PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
36368a21331SBarry Smith matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
36468a21331SBarry Smith PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
36568a21331SBarry Smith matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
36668a21331SBarry Smith PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
36768a21331SBarry Smith matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
36868a21331SBarry Smith PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
36968a21331SBarry Smith matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
37068a21331SBarry Smith }
371f1f2ae84SBarry Smith PetscCall(MatUpdateMPIAIJWithArray(A, a));
3729f0612e4SBarry Smith if (!PCMPIServerUseShmget) PetscCall(PetscFree(a));
37368a21331SBarry Smith PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
37468a21331SBarry Smith /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
37568a21331SBarry Smith if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
37668a21331SBarry Smith if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
37768a21331SBarry Smith if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
37868a21331SBarry Smith if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
37945682376SBarry Smith PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
3809f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE;
3813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
382f1f2ae84SBarry Smith }
383f1f2ae84SBarry Smith
PCMPISolve(PC pc,Vec B,Vec X)384d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
385d71ae5a4SJacob Faibussowitsch {
386f1f2ae84SBarry Smith PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
387f1f2ae84SBarry Smith KSP ksp;
388f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD;
389f1f2ae84SBarry Smith const PetscScalar *sb = NULL, *x;
390f1f2ae84SBarry Smith PetscScalar *b, *sx = NULL;
3915316cbedSBarry Smith PetscInt its, n;
3925316cbedSBarry Smith PetscMPIInt size;
3939f0612e4SBarry Smith void *addr[2];
394f1f2ae84SBarry Smith
395f1f2ae84SBarry Smith PetscFunctionBegin;
396f1f2ae84SBarry Smith PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
3973ba16761SJacob Faibussowitsch if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
3989f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE;
39945682376SBarry Smith PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
400f1f2ae84SBarry Smith PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
401f1f2ae84SBarry Smith
402f1f2ae84SBarry Smith /* scatterv rhs */
403dad3da8eSBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size));
4045316cbedSBarry Smith if (pc) {
4055316cbedSBarry Smith PetscInt N;
4065316cbedSBarry Smith
407dad3da8eSBarry Smith PCMPISolveCounts[size - 1]++;
40868a21331SBarry Smith PetscCall(MatGetSize(pc->pmat, &N, NULL));
4095316cbedSBarry Smith PCMPISizes[size - 1] += N;
410f1f2ae84SBarry Smith }
411f1f2ae84SBarry Smith PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
4129f0612e4SBarry Smith PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
4139f0612e4SBarry Smith if (!PCMPIServerUseShmget) {
414f1f2ae84SBarry Smith PetscCall(VecGetArray(ksp->vec_rhs, &b));
4159f0612e4SBarry Smith if (pc) PetscCall(VecGetArrayRead(B, &sb));
4163fa2bd1cSBarry Smith PetscCallMPI(MPIU_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
417f1f2ae84SBarry Smith if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
4189f0612e4SBarry Smith PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
4199f0612e4SBarry Smith // TODO: scatter initial guess if needed
4209f0612e4SBarry Smith } else {
4219f0612e4SBarry Smith PetscInt rstart;
4229f0612e4SBarry Smith
4239f0612e4SBarry Smith if (pc) PetscCall(VecGetArrayRead(B, &sb));
4249f0612e4SBarry Smith if (pc) PetscCall(VecGetArray(X, &sx));
4259f0612e4SBarry Smith const void *inaddr[2] = {(const void **)sb, (const void **)sx};
4269f0612e4SBarry Smith if (pc) PetscCall(VecRestoreArray(X, &sx));
4279f0612e4SBarry Smith if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
4289f0612e4SBarry Smith
4299f0612e4SBarry Smith PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr));
4309f0612e4SBarry Smith PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL));
4319f0612e4SBarry Smith PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0]));
4329f0612e4SBarry Smith PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1]));
4339f0612e4SBarry Smith }
4349f0612e4SBarry Smith PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
435f1f2ae84SBarry Smith
43645682376SBarry Smith PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
4370316ec64SBarry Smith PetscCall(PetscLogStagePush(PCMPIStage));
438f1f2ae84SBarry Smith PetscCall(KSPSolve(ksp, NULL, NULL));
4390316ec64SBarry Smith PetscCall(PetscLogStagePop());
44045682376SBarry Smith PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
4415316cbedSBarry Smith PetscCall(KSPGetIterationNumber(ksp, &its));
4425316cbedSBarry Smith PCMPIIterations[size - 1] += its;
4439f0612e4SBarry Smith // TODO: send iterations up to outer KSP
4449f0612e4SBarry Smith
4459f0612e4SBarry Smith if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr));
446f1f2ae84SBarry Smith
447f1f2ae84SBarry Smith /* gather solution */
4489f0612e4SBarry Smith PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
4499f0612e4SBarry Smith if (!PCMPIServerUseShmget) {
450f1f2ae84SBarry Smith PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
451f1f2ae84SBarry Smith if (pc) PetscCall(VecGetArray(X, &sx));
4523fa2bd1cSBarry Smith PetscCallMPI(MPIU_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
453f1f2ae84SBarry Smith if (pc) PetscCall(VecRestoreArray(X, &sx));
454f1f2ae84SBarry Smith PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
4559f0612e4SBarry Smith } else {
4568033637cSBarry Smith PetscCallMPI(MPI_Barrier(comm));
4579f0612e4SBarry Smith PetscCall(VecResetArray(ksp->vec_rhs));
4589f0612e4SBarry Smith PetscCall(VecResetArray(ksp->vec_sol));
4599f0612e4SBarry Smith }
4609f0612e4SBarry Smith PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
46145682376SBarry Smith PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
4629f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE;
4633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
464f1f2ae84SBarry Smith }
465f1f2ae84SBarry Smith
PCMPIDestroy(PC pc)466d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIDestroy(PC pc)
467d71ae5a4SJacob Faibussowitsch {
468f1f2ae84SBarry Smith PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
469f1f2ae84SBarry Smith KSP ksp;
470f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD;
471f1f2ae84SBarry Smith
472f1f2ae84SBarry Smith PetscFunctionBegin;
473f1f2ae84SBarry Smith PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
4743ba16761SJacob Faibussowitsch if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
475c7d372c4SBarry Smith PetscCall(PetscLogStagePush(PCMPIStage));
4769f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE;
477f1f2ae84SBarry Smith PetscCall(KSPDestroy(&ksp));
478c7d372c4SBarry Smith PetscCall(PetscLogStagePop());
4799f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE;
4803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
481f1f2ae84SBarry Smith }
482f1f2ae84SBarry Smith
PCMPIServerBroadcastRequest(PCMPICommand request)4839f0612e4SBarry Smith static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request)
4849f0612e4SBarry Smith {
4859f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
4869f0612e4SBarry Smith PetscMPIInt dummy1 = 1, dummy2;
4879f0612e4SBarry Smith #endif
4889f0612e4SBarry Smith
4899f0612e4SBarry Smith PetscFunctionBegin;
4909f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
4919f0612e4SBarry Smith if (PCMPIServerUseShmget) {
4929f0612e4SBarry Smith for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]);
4939f0612e4SBarry Smith }
4949f0612e4SBarry Smith #endif
4959f0612e4SBarry Smith PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
4969f0612e4SBarry Smith /* next line ensures the sender has already taken the lock */
4979f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
4989f0612e4SBarry Smith if (PCMPIServerUseShmget) {
4999f0612e4SBarry Smith PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
5009f0612e4SBarry Smith for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]);
5019f0612e4SBarry Smith }
5029f0612e4SBarry Smith #endif
5039f0612e4SBarry Smith PetscFunctionReturn(PETSC_SUCCESS);
5049f0612e4SBarry Smith }
5053821be0aSBarry Smith
506f1f2ae84SBarry Smith /*@C
5077a99bfcaSBarry Smith PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
508f1580f4eSBarry Smith parallel `KSP` solves and management of parallel `KSP` objects.
509f1f2ae84SBarry Smith
5103821be0aSBarry Smith Logically Collective on all MPI processes except rank 0
511f1f2ae84SBarry Smith
512f1580f4eSBarry Smith Options Database Keys:
513f1f2ae84SBarry Smith + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
5149f0612e4SBarry Smith . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
5159f0612e4SBarry Smith - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported)
516f1f2ae84SBarry Smith
51720f4b53cSBarry Smith Level: developer
51820f4b53cSBarry Smith
519f1580f4eSBarry Smith Note:
520f1f2ae84SBarry Smith This is normally started automatically in `PetscInitialize()` when the option is provided
521f1f2ae84SBarry Smith
5223821be0aSBarry Smith See `PCMPI` for information on using the solver with a `KSP` object
5233821be0aSBarry Smith
5247255af2bSBarry Smith See `PetscShmgetAllocateArray()` for instructions on how to ensure the shared memory is available on your machine.
5257255af2bSBarry Smith
526f1f2ae84SBarry Smith Developer Notes:
5273821be0aSBarry Smith When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
528f1580f4eSBarry Smith written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
529f1580f4eSBarry Smith (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
530f1f2ae84SBarry Smith
531f1580f4eSBarry Smith Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
532f1f2ae84SBarry Smith
5333821be0aSBarry Smith Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
5343821be0aSBarry Smith
5353821be0aSBarry Smith This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
5363821be0aSBarry Smith
537baca6076SPierre Jolivet The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
5383821be0aSBarry Smith nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
5393821be0aSBarry Smith
5407a99bfcaSBarry Smith The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
5413821be0aSBarry Smith all MPI processes but the user callback only runs on one MPI process per node.
5423821be0aSBarry Smith
5433821be0aSBarry Smith PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
5443821be0aSBarry Smith the `MPI_Comm` argument from PETSc calls.
5453821be0aSBarry Smith
5463821be0aSBarry Smith .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
547f1f2ae84SBarry Smith @*/
PCMPIServerBegin(void)548d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerBegin(void)
549d71ae5a4SJacob Faibussowitsch {
550f1f2ae84SBarry Smith PetscMPIInt rank;
551f1f2ae84SBarry Smith
552f1f2ae84SBarry Smith PetscFunctionBegin;
5539d3446b2SPierre Jolivet PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
5545e1a0e3cSBarry Smith if (PetscDefined(USE_SINGLE_LIBRARY)) {
5555e1a0e3cSBarry Smith PetscCall(VecInitializePackage());
5565e1a0e3cSBarry Smith PetscCall(MatInitializePackage());
5575e1a0e3cSBarry Smith PetscCall(DMInitializePackage());
5585e1a0e3cSBarry Smith PetscCall(PCInitializePackage());
5595e1a0e3cSBarry Smith PetscCall(KSPInitializePackage());
5605e1a0e3cSBarry Smith PetscCall(SNESInitializePackage());
5615e1a0e3cSBarry Smith PetscCall(TSInitializePackage());
5625e1a0e3cSBarry Smith PetscCall(TaoInitializePackage());
5635e1a0e3cSBarry Smith }
564956255efSBarry Smith PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
56545682376SBarry Smith PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist));
5669f0612e4SBarry Smith PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI));
5679f0612e4SBarry Smith
5689f0612e4SBarry Smith if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE;
5699f0612e4SBarry Smith PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL));
5705e1a0e3cSBarry Smith
571f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
5729f0612e4SBarry Smith if (PCMPIServerUseShmget) {
5739f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
5749f0612e4SBarry Smith PetscMPIInt size;
5759f0612e4SBarry Smith
5769f0612e4SBarry Smith PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
5779f0612e4SBarry Smith if (size > 1) {
5789f0612e4SBarry Smith pthread_mutex_t *locks;
5799f0612e4SBarry Smith
5809f0612e4SBarry Smith if (rank == 0) {
5819f0612e4SBarry Smith PCMPIServerActive = PETSC_TRUE;
5829f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks));
5839f0612e4SBarry Smith }
5849f0612e4SBarry Smith PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks));
5859f0612e4SBarry Smith if (rank == 0) {
5869f0612e4SBarry Smith pthread_mutexattr_t attr;
5879f0612e4SBarry Smith
5889f0612e4SBarry Smith pthread_mutexattr_init(&attr);
5899f0612e4SBarry Smith pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
5909f0612e4SBarry Smith
5919f0612e4SBarry Smith for (int i = 1; i < size; i++) {
5929f0612e4SBarry Smith pthread_mutex_init(&PCMPIServerLocks[i], &attr);
5939f0612e4SBarry Smith pthread_mutex_lock(&PCMPIServerLocks[i]);
5949f0612e4SBarry Smith }
5959f0612e4SBarry Smith }
5969f0612e4SBarry Smith PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
5979f0612e4SBarry Smith }
5989f0612e4SBarry Smith #endif
5999f0612e4SBarry Smith }
600f1f2ae84SBarry Smith if (rank == 0) {
601f1f2ae84SBarry Smith PETSC_COMM_WORLD = PETSC_COMM_SELF;
6023821be0aSBarry Smith PCMPIServerActive = PETSC_TRUE;
6033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
604f1f2ae84SBarry Smith }
605f1f2ae84SBarry Smith
606f1f2ae84SBarry Smith while (PETSC_TRUE) {
607f1f2ae84SBarry Smith PCMPICommand request = PCMPI_CREATE;
60866a7e86cSPierre Jolivet #if defined(PETSC_HAVE_PTHREAD_MUTEX)
6099f0612e4SBarry Smith PetscMPIInt dummy1 = 1, dummy2;
61066a7e86cSPierre Jolivet #endif
6119f0612e4SBarry Smith
612d7c1f440SPierre Jolivet // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters?
6139f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
6149f0612e4SBarry Smith if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]);
6159f0612e4SBarry Smith #endif
616f1f2ae84SBarry Smith PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
6179f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
6189f0612e4SBarry Smith if (PCMPIServerUseShmget) {
6199f0612e4SBarry Smith /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */
6209f0612e4SBarry Smith PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
6219f0612e4SBarry Smith pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]);
6229f0612e4SBarry Smith }
6239f0612e4SBarry Smith #endif
624f1f2ae84SBarry Smith switch (request) {
625d71ae5a4SJacob Faibussowitsch case PCMPI_CREATE:
626d71ae5a4SJacob Faibussowitsch PetscCall(PCMPICreate(NULL));
627d71ae5a4SJacob Faibussowitsch break;
628d71ae5a4SJacob Faibussowitsch case PCMPI_SET_MAT:
629d71ae5a4SJacob Faibussowitsch PetscCall(PCMPISetMat(NULL));
630d71ae5a4SJacob Faibussowitsch break;
631d71ae5a4SJacob Faibussowitsch case PCMPI_UPDATE_MAT_VALUES:
632d71ae5a4SJacob Faibussowitsch PetscCall(PCMPIUpdateMatValues(NULL));
633d71ae5a4SJacob Faibussowitsch break;
634f1f2ae84SBarry Smith case PCMPI_VIEW:
635f1f2ae84SBarry Smith // PetscCall(PCMPIView(NULL));
636f1f2ae84SBarry Smith break;
637d71ae5a4SJacob Faibussowitsch case PCMPI_SOLVE:
638d71ae5a4SJacob Faibussowitsch PetscCall(PCMPISolve(NULL, NULL, NULL));
639d71ae5a4SJacob Faibussowitsch break;
640d71ae5a4SJacob Faibussowitsch case PCMPI_DESTROY:
641d71ae5a4SJacob Faibussowitsch PetscCall(PCMPIDestroy(NULL));
642d71ae5a4SJacob Faibussowitsch break;
643f1f2ae84SBarry Smith case PCMPI_EXIT:
6449f0612e4SBarry Smith if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
645f1f2ae84SBarry Smith PetscCall(PetscFinalize());
646f1f2ae84SBarry Smith exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
647f1f2ae84SBarry Smith break;
648d71ae5a4SJacob Faibussowitsch default:
649d71ae5a4SJacob Faibussowitsch break;
650f1f2ae84SBarry Smith }
651f1f2ae84SBarry Smith }
6523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
653f1f2ae84SBarry Smith }
654f1f2ae84SBarry Smith
655f1f2ae84SBarry Smith /*@C
656f1f2ae84SBarry Smith PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
657f1580f4eSBarry Smith parallel KSP solves and management of parallel `KSP` objects.
658f1f2ae84SBarry Smith
65920f4b53cSBarry Smith Logically Collective on all MPI ranks except 0
66020f4b53cSBarry Smith
66120f4b53cSBarry Smith Level: developer
662f1f2ae84SBarry Smith
663f1580f4eSBarry Smith Note:
6649f0612e4SBarry Smith This is normally called automatically in `PetscFinalize()`
665f1f2ae84SBarry Smith
6663821be0aSBarry Smith .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
667f1f2ae84SBarry Smith @*/
PCMPIServerEnd(void)668d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerEnd(void)
669d71ae5a4SJacob Faibussowitsch {
670f1f2ae84SBarry Smith PetscFunctionBegin;
671f1f2ae84SBarry Smith if (PetscGlobalRank == 0) {
672f1f2ae84SBarry Smith PetscViewer viewer = NULL;
673f1f2ae84SBarry Smith PetscViewerFormat format;
674f1f2ae84SBarry Smith
6759f0612e4SBarry Smith PetscCall(PetscShmgetAddressesFinalize());
6769f0612e4SBarry Smith PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT));
6779f0612e4SBarry Smith if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
678f1f2ae84SBarry Smith PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
679f1f2ae84SBarry Smith PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
680f1f2ae84SBarry Smith PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
681f1f2ae84SBarry Smith PetscOptionsEnd();
682f1f2ae84SBarry Smith if (viewer) {
683f1f2ae84SBarry Smith PetscBool isascii;
684f1f2ae84SBarry Smith
685f1f2ae84SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
686f1f2ae84SBarry Smith if (isascii) {
687f1f2ae84SBarry Smith PetscMPIInt size;
6885316cbedSBarry Smith PetscMPIInt i;
689f1f2ae84SBarry Smith
690f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
6915316cbedSBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
6925316cbedSBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n"));
6935316cbedSBarry Smith if (PCMPIKSPCountsSeq) {
6945316cbedSBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
695f1f2ae84SBarry Smith }
6965316cbedSBarry Smith for (i = 0; i < size; i++) {
6975316cbedSBarry Smith if (PCMPIKSPCounts[i]) {
6985316cbedSBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
6995316cbedSBarry Smith }
7005316cbedSBarry Smith }
7019f0612e4SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not "));
702f1f2ae84SBarry Smith }
703648c30bcSBarry Smith PetscCall(PetscViewerDestroy(&viewer));
704f1f2ae84SBarry Smith }
705f1f2ae84SBarry Smith }
706f1f2ae84SBarry Smith PetscCall(PCMPICommsDestroy());
7073821be0aSBarry Smith PCMPIServerActive = PETSC_FALSE;
7083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
709f1f2ae84SBarry Smith }
710f1f2ae84SBarry Smith
711f1f2ae84SBarry Smith /*
712f1f2ae84SBarry Smith This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
713f1f2ae84SBarry Smith because, for example, the problem is small. This version is more efficient because it does not require copying any data
714f1f2ae84SBarry Smith */
PCSetUp_Seq(PC pc)715d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_Seq(PC pc)
716d71ae5a4SJacob Faibussowitsch {
717f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data;
718f1f2ae84SBarry Smith Mat sA;
719f1f2ae84SBarry Smith const char *prefix;
720f9818f3cSJose E. Roman char *found = NULL, *cprefix;
721f1f2ae84SBarry Smith
722f1f2ae84SBarry Smith PetscFunctionBegin;
7239f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE;
724f1f2ae84SBarry Smith PetscCall(PCGetOperators(pc, NULL, &sA));
725f1f2ae84SBarry Smith PetscCall(PCGetOptionsPrefix(pc, &prefix));
726f1f2ae84SBarry Smith PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
7273821be0aSBarry Smith PetscCall(KSPSetNestLevel(km->ksps[0], 1));
7283821be0aSBarry Smith PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
7293821be0aSBarry Smith
7303821be0aSBarry Smith /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
7313821be0aSBarry Smith PetscCall(PCGetOptionsPrefix(pc, &prefix));
7323821be0aSBarry Smith PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
7333821be0aSBarry Smith PetscCall(PetscStrallocpy(prefix, &cprefix));
7343821be0aSBarry Smith PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
7353821be0aSBarry Smith PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
7363821be0aSBarry Smith *found = 0;
7373821be0aSBarry Smith PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
7383821be0aSBarry Smith PetscCall(PetscFree(cprefix));
7393821be0aSBarry Smith
740f1f2ae84SBarry Smith PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
741f1f2ae84SBarry Smith PetscCall(KSPSetFromOptions(km->ksps[0]));
742f1f2ae84SBarry Smith PetscCall(KSPSetUp(km->ksps[0]));
743835f2295SStefano Zampini PetscCall(PetscInfo(pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
744f1f2ae84SBarry Smith PCMPIKSPCountsSeq++;
7459f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE;
7463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
747f1f2ae84SBarry Smith }
748f1f2ae84SBarry Smith
PCApply_Seq(PC pc,Vec b,Vec x)749d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
750d71ae5a4SJacob Faibussowitsch {
751f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data;
7525316cbedSBarry Smith PetscInt its, n;
7535316cbedSBarry Smith Mat A;
754f1f2ae84SBarry Smith
755f1f2ae84SBarry Smith PetscFunctionBegin;
7569f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE;
757f1f2ae84SBarry Smith PetscCall(KSPSolve(km->ksps[0], b, x));
7585316cbedSBarry Smith PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
759f1f2ae84SBarry Smith PCMPISolveCountsSeq++;
7605316cbedSBarry Smith PCMPIIterationsSeq += its;
7615316cbedSBarry Smith PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
7625316cbedSBarry Smith PetscCall(MatGetSize(A, &n, NULL));
7635316cbedSBarry Smith PCMPISizesSeq += n;
7649f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE;
7659f0612e4SBarry Smith /*
7669f0612e4SBarry Smith do not keep reference to previous rhs and solution since destroying them in the next KSPSolve()
7679f0612e4SBarry Smith my use PetscFree() instead of PCMPIArrayDeallocate()
7689f0612e4SBarry Smith */
7699f0612e4SBarry Smith PetscCall(VecDestroy(&km->ksps[0]->vec_rhs));
7709f0612e4SBarry Smith PetscCall(VecDestroy(&km->ksps[0]->vec_sol));
7713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
772f1f2ae84SBarry Smith }
773f1f2ae84SBarry Smith
PCView_Seq(PC pc,PetscViewer viewer)774d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
775d71ae5a4SJacob Faibussowitsch {
776f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data;
777f1f2ae84SBarry Smith
778f1f2ae84SBarry Smith PetscFunctionBegin;
779f1f2ae84SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
780835f2295SStefano Zampini PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
7815316cbedSBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
7823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
783f1f2ae84SBarry Smith }
784f1f2ae84SBarry Smith
PCDestroy_Seq(PC pc)785d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCDestroy_Seq(PC pc)
786d71ae5a4SJacob Faibussowitsch {
787f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data;
7889f0612e4SBarry Smith Mat A, B;
7899f0612e4SBarry Smith Vec x, b;
790f1f2ae84SBarry Smith
791f1f2ae84SBarry Smith PetscFunctionBegin;
7929f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE;
7939f0612e4SBarry Smith /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */
7949f0612e4SBarry Smith PetscCall(KSPGetOperators(km->ksps[0], &A, &B));
7959f0612e4SBarry Smith PetscCall(PetscObjectReference((PetscObject)A));
7969f0612e4SBarry Smith PetscCall(PetscObjectReference((PetscObject)B));
7979f0612e4SBarry Smith PetscCall(KSPGetSolution(km->ksps[0], &x));
7989f0612e4SBarry Smith PetscCall(PetscObjectReference((PetscObject)x));
7999f0612e4SBarry Smith PetscCall(KSPGetRhs(km->ksps[0], &b));
8009f0612e4SBarry Smith PetscCall(PetscObjectReference((PetscObject)b));
801f1f2ae84SBarry Smith PetscCall(KSPDestroy(&km->ksps[0]));
802f1f2ae84SBarry Smith PetscCall(PetscFree(pc->data));
8039f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE;
8049f0612e4SBarry Smith PetscCall(MatDestroy(&A));
8059f0612e4SBarry Smith PetscCall(MatDestroy(&B));
8069f0612e4SBarry Smith PetscCall(VecDestroy(&x));
8079f0612e4SBarry Smith PetscCall(VecDestroy(&b));
8083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
809f1f2ae84SBarry Smith }
810f1f2ae84SBarry Smith
811f1f2ae84SBarry Smith /*
812f1f2ae84SBarry Smith PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
813dd8e379bSPierre Jolivet right-hand side to the parallel PC
814f1f2ae84SBarry Smith */
PCSetUp_MPI(PC pc)815d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_MPI(PC pc)
816d71ae5a4SJacob Faibussowitsch {
817f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data;
818f1f2ae84SBarry Smith PetscMPIInt rank, size;
819f1f2ae84SBarry Smith PetscBool newmatrix = PETSC_FALSE;
820f1f2ae84SBarry Smith
821f1f2ae84SBarry Smith PetscFunctionBegin;
822f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
823f1f2ae84SBarry Smith PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
824f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
825f1f2ae84SBarry Smith
826f1f2ae84SBarry Smith if (!pc->setupcalled) {
827f1f2ae84SBarry Smith if (!km->alwaysuseserver) {
828f1f2ae84SBarry Smith PetscInt n;
829f1f2ae84SBarry Smith Mat sA;
830f1f2ae84SBarry Smith /* short circuit for small systems */
831f1f2ae84SBarry Smith PetscCall(PCGetOperators(pc, &sA, &sA));
832f1f2ae84SBarry Smith PetscCall(MatGetSize(sA, &n, NULL));
833f1f2ae84SBarry Smith if (n < 2 * km->mincntperrank - 1 || size == 1) {
834f1f2ae84SBarry Smith pc->ops->setup = NULL;
835f1f2ae84SBarry Smith pc->ops->apply = PCApply_Seq;
836f1f2ae84SBarry Smith pc->ops->destroy = PCDestroy_Seq;
837f1f2ae84SBarry Smith pc->ops->view = PCView_Seq;
838f1f2ae84SBarry Smith PetscCall(PCSetUp_Seq(pc));
8393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
840f1f2ae84SBarry Smith }
841f1f2ae84SBarry Smith }
842f1f2ae84SBarry Smith
8439f0612e4SBarry Smith PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE));
844f1f2ae84SBarry Smith PetscCall(PCMPICreate(pc));
845f1f2ae84SBarry Smith newmatrix = PETSC_TRUE;
8469371c9d4SSatish Balay }
8479371c9d4SSatish Balay if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
848f1f2ae84SBarry Smith
849f1f2ae84SBarry Smith if (newmatrix) {
850835f2295SStefano Zampini PetscCall(PetscInfo(pc, "New matrix or matrix has changed nonzero structure\n"));
8519f0612e4SBarry Smith PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT));
852f1f2ae84SBarry Smith PetscCall(PCMPISetMat(pc));
853f1f2ae84SBarry Smith } else {
854835f2295SStefano Zampini PetscCall(PetscInfo(pc, "Matrix has only changed nonzero values\n"));
8559f0612e4SBarry Smith PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES));
856f1f2ae84SBarry Smith PetscCall(PCMPIUpdateMatValues(pc));
857f1f2ae84SBarry Smith }
8583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
859f1f2ae84SBarry Smith }
860f1f2ae84SBarry Smith
PCApply_MPI(PC pc,Vec b,Vec x)861d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
862d71ae5a4SJacob Faibussowitsch {
863f1f2ae84SBarry Smith PetscFunctionBegin;
8649f0612e4SBarry Smith PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE));
865f1f2ae84SBarry Smith PetscCall(PCMPISolve(pc, b, x));
8663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
867f1f2ae84SBarry Smith }
868f1f2ae84SBarry Smith
PCDestroy_MPI(PC pc)86966976f2fSJacob Faibussowitsch static PetscErrorCode PCDestroy_MPI(PC pc)
870d71ae5a4SJacob Faibussowitsch {
871f1f2ae84SBarry Smith PetscFunctionBegin;
8729f0612e4SBarry Smith PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY));
873f1f2ae84SBarry Smith PetscCall(PCMPIDestroy(pc));
874f1f2ae84SBarry Smith PetscCall(PetscFree(pc->data));
8753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
876f1f2ae84SBarry Smith }
877f1f2ae84SBarry Smith
878f1f2ae84SBarry Smith /*
8799f0612e4SBarry Smith PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database
880f1f2ae84SBarry Smith */
PCView_MPI(PC pc,PetscViewer viewer)88166976f2fSJacob Faibussowitsch static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
882d71ae5a4SJacob Faibussowitsch {
883f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data;
884f1f2ae84SBarry Smith MPI_Comm comm;
885f1f2ae84SBarry Smith PetscMPIInt size;
886f1f2ae84SBarry Smith
887f1f2ae84SBarry Smith PetscFunctionBegin;
888f1f2ae84SBarry Smith PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
889f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size));
890f1f2ae84SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
891835f2295SStefano Zampini PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
8929f0612e4SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n"));
8933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
894f1f2ae84SBarry Smith }
895f1f2ae84SBarry Smith
PCSetFromOptions_MPI(PC pc,PetscOptionItems PetscOptionsObject)896ce78bad3SBarry Smith static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems PetscOptionsObject)
897d71ae5a4SJacob Faibussowitsch {
898f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data;
899f1f2ae84SBarry Smith
900f1f2ae84SBarry Smith PetscFunctionBegin;
901f1f2ae84SBarry Smith PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
9023821be0aSBarry Smith PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
9033821be0aSBarry Smith PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
904f1f2ae84SBarry Smith PetscOptionsHeadEnd();
9053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
906f1f2ae84SBarry Smith }
907f1f2ae84SBarry Smith
908f1f2ae84SBarry Smith /*MC
909f1580f4eSBarry Smith PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
910f1f2ae84SBarry Smith
9113821be0aSBarry Smith Options Database Keys for the Server:
912f1f2ae84SBarry Smith + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
9139f0612e4SBarry Smith . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
9149f0612e4SBarry Smith - -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true
915f1f2ae84SBarry Smith
9163821be0aSBarry Smith Options Database Keys for a specific `KSP` object
9173821be0aSBarry Smith + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
9183821be0aSBarry Smith - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
9193821be0aSBarry Smith
9203821be0aSBarry Smith Level: developer
92120f4b53cSBarry Smith
922f1f2ae84SBarry Smith Notes:
9239f0612e4SBarry Smith This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or
9249f0612e4SBarry Smith `MatCreateSeqAIJWithArrays()`
9259f0612e4SBarry Smith
92646bbbc36SPierre Jolivet The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
927f1f2ae84SBarry Smith
928f1f2ae84SBarry Smith It can be particularly useful for user OpenMP code or potentially user GPU code.
929f1f2ae84SBarry Smith
930dd8e379bSPierre Jolivet When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
9313821be0aSBarry Smith and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
932f1f2ae84SBarry Smith
9333821be0aSBarry Smith The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
9340316ec64SBarry Smith because they are not the actual solver objects.
9350316ec64SBarry Smith
9360316ec64SBarry Smith When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
93768a21331SBarry Smith stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
9380316ec64SBarry Smith
9393821be0aSBarry Smith Developer Note:
9403821be0aSBarry Smith This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
9413821be0aSBarry Smith a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
9423821be0aSBarry Smith
9433821be0aSBarry Smith .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
944f1f2ae84SBarry Smith M*/
PCCreate_MPI(PC pc)945d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
946d71ae5a4SJacob Faibussowitsch {
947f1f2ae84SBarry Smith PC_MPI *km;
948f9818f3cSJose E. Roman char *found = NULL;
949f1f2ae84SBarry Smith
950f1f2ae84SBarry Smith PetscFunctionBegin;
9513821be0aSBarry Smith PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
9523821be0aSBarry Smith PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
9533821be0aSBarry Smith
9543821be0aSBarry Smith /* material from PCSetType() */
9553821be0aSBarry Smith PetscTryTypeMethod(pc, destroy);
9563821be0aSBarry Smith pc->ops->destroy = NULL;
9573821be0aSBarry Smith pc->data = NULL;
9583821be0aSBarry Smith
9593821be0aSBarry Smith PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
9603821be0aSBarry Smith PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
9613821be0aSBarry Smith pc->modifysubmatrices = NULL;
9623821be0aSBarry Smith pc->modifysubmatricesP = NULL;
963371d2eb7SMartin Diehl pc->setupcalled = PETSC_FALSE;
9643821be0aSBarry Smith
9654dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&km));
966f1f2ae84SBarry Smith pc->data = (void *)km;
967f1f2ae84SBarry Smith
968f1f2ae84SBarry Smith km->mincntperrank = 10000;
969f1f2ae84SBarry Smith
970f1f2ae84SBarry Smith pc->ops->setup = PCSetUp_MPI;
971f1f2ae84SBarry Smith pc->ops->apply = PCApply_MPI;
972f1f2ae84SBarry Smith pc->ops->destroy = PCDestroy_MPI;
973f1f2ae84SBarry Smith pc->ops->view = PCView_MPI;
974f1f2ae84SBarry Smith pc->ops->setfromoptions = PCSetFromOptions_MPI;
9753821be0aSBarry Smith PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
9763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
977f1f2ae84SBarry Smith }
9789f0612e4SBarry Smith
9799f0612e4SBarry Smith /*@
9809f0612e4SBarry Smith PCMPIGetKSP - Gets the `KSP` created by the `PCMPI`
9819f0612e4SBarry Smith
9829f0612e4SBarry Smith Not Collective
9839f0612e4SBarry Smith
9849f0612e4SBarry Smith Input Parameter:
9859f0612e4SBarry Smith . pc - the preconditioner context
9869f0612e4SBarry Smith
9879f0612e4SBarry Smith Output Parameter:
9889f0612e4SBarry Smith . innerksp - the inner `KSP`
9899f0612e4SBarry Smith
9909f0612e4SBarry Smith Level: advanced
9919f0612e4SBarry Smith
9929f0612e4SBarry Smith .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE`
9939f0612e4SBarry Smith @*/
PCMPIGetKSP(PC pc,KSP * innerksp)9949f0612e4SBarry Smith PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp)
9959f0612e4SBarry Smith {
9969f0612e4SBarry Smith PC_MPI *red = (PC_MPI *)pc->data;
9979f0612e4SBarry Smith
9989f0612e4SBarry Smith PetscFunctionBegin;
9999f0612e4SBarry Smith PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
10009f0612e4SBarry Smith PetscAssertPointer(innerksp, 2);
10019f0612e4SBarry Smith *innerksp = red->ksps[0];
10029f0612e4SBarry Smith PetscFunctionReturn(PETSC_SUCCESS);
10039f0612e4SBarry Smith }
1004