xref: /petsc/src/ksp/ksp/tutorials/ex76.c (revision af03210a4ba8f2d32b2c281b09bef706920de71b)
1 #include <petscksp.h>
2 #include <petsc/private/petscimpl.h>
3 
4 static char help[] = "Solves a linear system using PCHPDDM.\n\n";
5 
main(int argc,char ** args)6 int main(int argc, char **args)
7 {
8   Vec             b;            /* computed solution and RHS */
9   Mat             A, aux, X, B; /* linear system matrix */
10   KSP             ksp;          /* linear solver context */
11   PC              pc;
12   IS              is, sizes;
13   const PetscInt *idx;
14   PetscMPIInt     rank, size;
15   PetscInt        m, N = 1;
16   PetscLayout     map;
17   PetscViewer     viewer;
18   char            dir[PETSC_MAX_PATH_LEN], name[PETSC_MAX_PATH_LEN], type[256];
19   PetscBool3      share = PETSC_BOOL3_UNKNOWN;
20   PetscBool       flg, set, transpose = PETSC_FALSE, explicit = PETSC_FALSE;
21 
22   PetscFunctionBeginUser;
23   PetscCall(PetscInitialize(&argc, &args, NULL, help));
24   PetscCall(PetscLogDefaultBegin());
25   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
26   PetscCheck(size == 4, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This example requires 4 processes");
27   PetscCall(PetscOptionsGetInt(NULL, NULL, "-rhs", &N, NULL));
28   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
29   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
30   PetscCall(PetscStrncpy(dir, ".", sizeof(dir)));
31   PetscCall(PetscOptionsGetString(NULL, NULL, "-load_dir", dir, sizeof(dir), NULL));
32   /* loading matrices */
33   PetscCall(PetscSNPrintf(name, sizeof(name), "%s/sizes_%d.dat", dir, size));
34   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, name, FILE_MODE_READ, &viewer));
35   PetscCall(ISCreate(PETSC_COMM_WORLD, &sizes));
36   PetscCall(ISLoad(sizes, viewer));
37   PetscCall(ISGetIndices(sizes, &idx));
38   PetscCall(MatSetSizes(A, idx[0], idx[1], idx[2], idx[3]));
39   PetscCall(MatCreate(PETSC_COMM_WORLD, &X));
40   PetscCall(MatSetSizes(X, idx[4], idx[4], PETSC_DETERMINE, PETSC_DETERMINE));
41   PetscCall(MatSetUp(X));
42   PetscCall(ISRestoreIndices(sizes, &idx));
43   PetscCall(ISDestroy(&sizes));
44   PetscCall(PetscViewerDestroy(&viewer));
45   PetscCall(PetscSNPrintf(name, sizeof(name), "%s/A.dat", dir));
46   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, name, FILE_MODE_READ, &viewer));
47   PetscCall(MatLoad(A, viewer));
48   PetscCall(PetscViewerDestroy(&viewer));
49   PetscCall(PetscSNPrintf(name, sizeof(name), "%s/is_%d.dat", dir, size));
50   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, name, FILE_MODE_READ, &viewer));
51   PetscCall(ISCreate(PETSC_COMM_WORLD, &sizes));
52   PetscCall(MatGetLayouts(X, &map, NULL));
53   PetscCall(ISSetLayout(sizes, map));
54   PetscCall(ISLoad(sizes, viewer));
55   PetscCall(ISGetLocalSize(sizes, &m));
56   PetscCall(ISGetIndices(sizes, &idx));
57   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_COPY_VALUES, &is));
58   PetscCall(ISRestoreIndices(sizes, &idx));
59   PetscCall(ISDestroy(&sizes));
60   PetscCall(MatGetBlockSize(A, &m));
61   PetscCall(ISSetBlockSize(is, m));
62   PetscCall(PetscViewerDestroy(&viewer));
63   PetscCall(PetscSNPrintf(name, sizeof(name), "%s/Neumann_%d.dat", dir, size));
64   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, name, FILE_MODE_READ, &viewer));
65   PetscCall(MatLoad(X, viewer));
66   PetscCall(PetscViewerDestroy(&viewer));
67   PetscCall(MatGetDiagonalBlock(X, &B));
68   PetscCall(MatDuplicate(B, MAT_COPY_VALUES, &aux));
69   PetscCall(MatDestroy(&X));
70   flg = PETSC_FALSE;
71   PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_hpddm_levels_1_st_share_sub_ksp", &flg, &set));
72   if (flg) { /* PETSc LU/Cholesky is struggling numerically for bs > 1          */
73              /* only set the proper bs for the geneo_share_* tests, 1 otherwise */
74     PetscCall(MatSetBlockSizesFromMats(aux, A, A));
75     share = PETSC_BOOL3_TRUE;
76   } else if (set) share = PETSC_BOOL3_FALSE;
77   PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
78   PetscCall(MatSetOption(aux, MAT_SYMMETRIC, PETSC_TRUE));
79   /* ready for testing */
80   PetscOptionsBegin(PETSC_COMM_WORLD, "", "", "");
81   PetscCall(PetscStrncpy(type, MATAIJ, sizeof(type)));
82   PetscCall(PetscOptionsFList("-mat_type", "Matrix type", "MatSetType", MatList, type, type, 256, &flg));
83   PetscOptionsEnd();
84   PetscCall(MatConvert(A, type, MAT_INPLACE_MATRIX, &A));
85   PetscCall(MatConvert(aux, type, MAT_INPLACE_MATRIX, &aux));
86   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
87   PetscCall(KSPSetOperators(ksp, A, A));
88   PetscCall(KSPGetPC(ksp, &pc));
89   PetscCall(PCSetType(pc, PCHPDDM));
90 #if defined(PETSC_HAVE_HPDDM) && defined(PETSC_HAVE_DYNAMIC_LIBRARIES) && defined(PETSC_USE_SHARED_LIBRARIES)
91   flg = PETSC_FALSE;
92   PetscCall(PetscOptionsGetBool(NULL, NULL, "-reset", &flg, NULL));
93   if (flg) {
94     PetscCall(PetscOptionsSetValue(NULL, "-pc_hpddm_block_splitting", "true"));
95     PetscCall(PCSetFromOptions(pc));
96     PetscCall(PCSetUp(pc));
97     PetscCall(PetscOptionsClearValue(NULL, "-pc_hpddm_block_splitting"));
98   }
99   PetscCall(PCHPDDMSetAuxiliaryMat(pc, is, aux, NULL, NULL));
100   PetscCall(PCHPDDMHasNeumannMat(pc, PETSC_FALSE)); /* PETSC_TRUE is fine as well, just testing */
101   if (share == PETSC_BOOL3_UNKNOWN) PetscCall(PCHPDDMSetSTShareSubKSP(pc, PetscBool3ToBool(share)));
102   flg = PETSC_FALSE;
103   PetscCall(PetscOptionsGetBool(NULL, NULL, "-set_rhs", &flg, NULL));
104   if (flg) {          /* user-provided RHS for concurrent generalized eigenvalue problems                          */
105     Mat      a, c, P; /* usually assembled automatically in PCHPDDM, this is solely for testing PCHPDDMSetRHSMat() */
106     PetscInt rstart, rend, location;
107 
108     PetscCall(MatDuplicate(aux, MAT_DO_NOT_COPY_VALUES, &B)); /* duplicate so that MatStructure is SAME_NONZERO_PATTERN */
109     PetscCall(MatGetDiagonalBlock(A, &a));
110     PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
111     PetscCall(ISGetLocalSize(is, &m));
112     PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, rend - rstart, m, 1, NULL, &P));
113     for (m = rstart; m < rend; ++m) {
114       PetscCall(ISLocate(is, m, &location));
115       PetscCheck(location >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "IS of the auxiliary Mat does not include all local rows of A");
116       PetscCall(MatSetValue(P, m - rstart, location, 1.0, INSERT_VALUES));
117     }
118     PetscCall(MatAssemblyBegin(P, MAT_FINAL_ASSEMBLY));
119     PetscCall(MatAssemblyEnd(P, MAT_FINAL_ASSEMBLY));
120     PetscCall(PetscObjectTypeCompare((PetscObject)a, MATSEQAIJ, &flg));
121     if (flg) PetscCall(MatPtAP(a, P, MAT_INITIAL_MATRIX, 1.0, &X)); // MatPtAP() is used to extend diagonal blocks with zeros on the overlap
122     else {                                                          // workaround for MatPtAP() limitations with some types
123       PetscCall(MatConvert(a, MATSEQAIJ, MAT_INITIAL_MATRIX, &c));
124       PetscCall(MatPtAP(c, P, MAT_INITIAL_MATRIX, 1.0, &X));
125       PetscCall(MatDestroy(&c));
126     }
127     PetscCall(MatDestroy(&P));
128     PetscCall(MatAXPY(B, 1.0, X, SUBSET_NONZERO_PATTERN));
129     PetscCall(MatDestroy(&X));
130     PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
131     PetscCall(PCHPDDMSetRHSMat(pc, B));
132     PetscCall(MatDestroy(&B));
133   }
134 #else
135   (void)share;
136 #endif
137   PetscCall(MatDestroy(&aux));
138   PetscCall(KSPSetFromOptions(ksp));
139   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCASM, &flg));
140   if (flg) {
141     flg = PETSC_FALSE;
142     PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_hpddm_define_subdomains", &flg, NULL));
143     if (flg) {
144       IS rows;
145 
146       PetscCall(MatGetOwnershipIS(A, &rows, NULL));
147       PetscCall(PCASMSetLocalSubdomains(pc, 1, &is, &rows));
148       PetscCall(ISDestroy(&rows));
149     }
150   }
151   PetscCall(ISDestroy(&is));
152   PetscCall(MatCreateVecs(A, NULL, &b));
153   PetscCall(VecSet(b, 1.0));
154   PetscCall(PetscOptionsGetBool(NULL, NULL, "-transpose", &transpose, NULL));
155   if (!transpose) PetscCall(KSPSolve(ksp, b, b));
156   else {
157     PetscCall(KSPSolveTranspose(ksp, b, b));
158     PetscCall(PetscOptionsGetBool(NULL, NULL, "-ksp_use_explicittranspose", &explicit, NULL));
159     if (explicit) PetscCall(KSPSetOperators(ksp, A, A)); /* -ksp_use_explicittranspose does not cache the initial Mat and will transpose the explicit transpose again if not set back to the original Mat */
160   }
161   PetscCall(VecGetLocalSize(b, &m));
162   PetscCall(VecDestroy(&b));
163   if (N > 1) {
164     KSPType type;
165     VecType vt;
166 
167     PetscCall(PetscOptionsClearValue(NULL, "-ksp_converged_reason"));
168     PetscCall(KSPSetFromOptions(ksp));
169     PetscCall(MatGetVecType(A, &vt));
170     PetscCall(MatCreateDenseFromVecType(PETSC_COMM_WORLD, vt, m, PETSC_DECIDE, PETSC_DECIDE, N, PETSC_DECIDE, NULL, &B));
171     PetscCall(MatCreateDenseFromVecType(PETSC_COMM_WORLD, vt, m, PETSC_DECIDE, PETSC_DECIDE, N, PETSC_DECIDE, NULL, &X));
172     PetscCall(MatSetRandom(B, NULL));
173     /* this is algorithmically optimal in the sense that blocks of vectors are coarsened or interpolated using matrix--matrix operations */
174     /* PCHPDDM however heavily relies on MPI[S]BAIJ format for which there is no efficient MatProduct implementation */
175     if (!transpose) PetscCall(KSPMatSolve(ksp, B, X));
176     else {
177       PetscCall(KSPMatSolveTranspose(ksp, B, X));
178       if (explicit) PetscCall(KSPSetOperators(ksp, A, A)); /* same as in the prior KSPSolveTranspose() */
179     }
180     PetscCall(KSPGetType(ksp, &type));
181     PetscCall(PetscStrcmp(type, KSPHPDDM, &flg));
182 #if defined(PETSC_HAVE_HPDDM)
183     if (flg) {
184       PetscReal    norm;
185       KSPHPDDMType type;
186 
187       PetscCall(KSPHPDDMGetType(ksp, &type));
188       if (type == KSP_HPDDM_TYPE_PREONLY || type == KSP_HPDDM_TYPE_CG || type == KSP_HPDDM_TYPE_GMRES || type == KSP_HPDDM_TYPE_GCRODR) {
189         Mat C;
190 
191         PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &C));
192         PetscCall(KSPSetMatSolveBatchSize(ksp, 1));
193         if (!transpose) PetscCall(KSPMatSolve(ksp, B, C));
194         else {
195           PetscCall(KSPMatSolveTranspose(ksp, B, C));
196           if (explicit) PetscCall(KSPSetOperators(ksp, A, A)); /* same as in the prior KSPMatSolveTranspose() */
197         }
198         PetscCall(MatAYPX(C, -1.0, X, SAME_NONZERO_PATTERN));
199         PetscCall(MatNorm(C, NORM_INFINITY, &norm));
200         PetscCall(MatDestroy(&C));
201         PetscCheck(norm <= 100 * PETSC_MACHINE_EPSILON, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "KSPMatSolve%s() and KSPSolve%s() difference has nonzero norm %g with pseudo-block KSPHPDDMType %s", (transpose ? "Transpose" : ""), (transpose ? "Transpose" : ""), (double)norm, KSPHPDDMTypes[type]);
202       }
203     }
204 #endif
205     PetscCall(MatDestroy(&X));
206     PetscCall(MatDestroy(&B));
207   }
208   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCHPDDM, &flg));
209 #if defined(PETSC_HAVE_HPDDM) && defined(PETSC_HAVE_DYNAMIC_LIBRARIES) && defined(PETSC_USE_SHARED_LIBRARIES)
210   if (flg) PetscCall(PCHPDDMGetSTShareSubKSP(pc, &flg));
211 #endif
212   if (flg && PetscDefined(USE_LOG)) {
213     PetscCall(PetscOptionsHasName(NULL, NULL, "-pc_hpddm_harmonic_overlap", &flg));
214     if (!flg) {
215       PetscLogEvent      event;
216       PetscEventPerfInfo info1, info2;
217 
218       PetscCall(PetscLogEventRegister("MatLUFactorSym", PC_CLASSID, &event));
219       PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &info1));
220       PetscCall(PetscLogEventRegister("MatLUFactorNum", PC_CLASSID, &event));
221       PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &info2));
222       if (!info1.count && !info2.count) {
223         PetscCall(PetscLogEventRegister("MatCholFctrSym", PC_CLASSID, &event));
224         PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &info1));
225         PetscCall(PetscLogEventRegister("MatCholFctrNum", PC_CLASSID, &event));
226         PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &info2));
227         PetscCheck(info2.count > info1.count, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cholesky numerical factorization (%d) not called more times than Cholesky symbolic factorization (%d), broken -pc_hpddm_levels_1_st_share_sub_ksp", info2.count, info1.count);
228       } else PetscCheck(info2.count > info1.count, PETSC_COMM_SELF, PETSC_ERR_PLIB, "LU numerical factorization (%d) not called more times than LU symbolic factorization (%d), broken -pc_hpddm_levels_1_st_share_sub_ksp", info2.count, info1.count);
229     }
230   }
231 #if defined(PETSC_HAVE_HPDDM) && defined(PETSC_HAVE_DYNAMIC_LIBRARIES) && defined(PETSC_USE_SHARED_LIBRARIES)
232   if (N == 1) {
233     flg = PETSC_FALSE;
234     PetscCall(PetscOptionsGetBool(NULL, NULL, "-successive_solves", &flg, NULL));
235     if (flg) {
236       KSPConvergedReason reason[2];
237       PetscInt           iterations[3];
238 
239       PetscCall(KSPGetConvergedReason(ksp, reason));
240       PetscCall(KSPGetTotalIterations(ksp, iterations));
241       PetscCall(PetscOptionsClearValue(NULL, "-ksp_converged_reason"));
242       PetscCall(KSPSetFromOptions(ksp));
243       flg = PETSC_FALSE;
244       PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_hpddm_block_splitting", &flg, NULL));
245       if (!flg) {
246         PetscCall(PetscSNPrintf(name, sizeof(name), "%s/sizes_%d.dat", dir, size));
247         PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, name, FILE_MODE_READ, &viewer));
248         PetscCall(ISCreate(PETSC_COMM_WORLD, &sizes));
249         PetscCall(ISLoad(sizes, viewer));
250         PetscCall(ISGetIndices(sizes, &idx));
251         PetscCall(MatCreate(PETSC_COMM_WORLD, &X));
252         PetscCall(MatSetSizes(X, idx[4], idx[4], PETSC_DETERMINE, PETSC_DETERMINE));
253         PetscCall(MatSetUp(X));
254         PetscCall(ISRestoreIndices(sizes, &idx));
255         PetscCall(ISDestroy(&sizes));
256         PetscCall(PetscViewerDestroy(&viewer));
257         PetscCall(PetscSNPrintf(name, sizeof(name), "%s/is_%d.dat", dir, size));
258         PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, name, FILE_MODE_READ, &viewer));
259         PetscCall(ISCreate(PETSC_COMM_WORLD, &sizes));
260         PetscCall(MatGetLayouts(X, &map, NULL));
261         PetscCall(ISSetLayout(sizes, map));
262         PetscCall(ISLoad(sizes, viewer));
263         PetscCall(ISGetLocalSize(sizes, &m));
264         PetscCall(ISGetIndices(sizes, &idx));
265         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_COPY_VALUES, &is));
266         PetscCall(ISRestoreIndices(sizes, &idx));
267         PetscCall(ISDestroy(&sizes));
268         PetscCall(MatGetBlockSize(A, &m));
269         PetscCall(ISSetBlockSize(is, m));
270         PetscCall(PetscViewerDestroy(&viewer));
271         PetscCall(PetscSNPrintf(name, sizeof(name), "%s/Neumann_%d.dat", dir, size));
272         PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, name, FILE_MODE_READ, &viewer));
273         PetscCall(MatLoad(X, viewer));
274         PetscCall(PetscViewerDestroy(&viewer));
275         PetscCall(MatGetDiagonalBlock(X, &B));
276         PetscCall(MatDuplicate(B, MAT_COPY_VALUES, &aux));
277         PetscCall(MatDestroy(&X));
278         PetscCall(MatSetBlockSizesFromMats(aux, A, A));
279         PetscCall(MatSetOption(aux, MAT_SYMMETRIC, PETSC_TRUE));
280         PetscCall(MatConvert(aux, type, MAT_INPLACE_MATRIX, &aux));
281       }
282       PetscCall(MatCreateVecs(A, NULL, &b));
283       PetscCall(PetscObjectStateIncrease((PetscObject)A));
284       if (!flg) PetscCall(PCHPDDMSetAuxiliaryMat(pc, NULL, aux, NULL, NULL));
285       PetscCall(VecSet(b, 1.0));
286       if (!transpose) PetscCall(KSPSolve(ksp, b, b));
287       else PetscCall(KSPSolveTranspose(ksp, b, b));
288       PetscCall(KSPGetConvergedReason(ksp, reason + 1));
289       PetscCall(KSPGetTotalIterations(ksp, iterations + 1));
290       iterations[1] -= iterations[0];
291       PetscCheck(reason[0] == reason[1] && PetscAbs(iterations[0] - iterations[1]) <= 3, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "Successive calls to KSPSolve%s() did not converge for the same reason (%s v. %s) or with the same number of iterations (+/- 3, %" PetscInt_FMT " v. %" PetscInt_FMT ")", (transpose ? "Transpose" : ""), KSPConvergedReasons[reason[0]], KSPConvergedReasons[reason[1]], iterations[0], iterations[1]);
292       PetscCall(PetscObjectStateIncrease((PetscObject)A));
293       if (!flg) PetscCall(PCHPDDMSetAuxiliaryMat(pc, is, aux, NULL, NULL));
294       PetscCall(PCSetFromOptions(pc));
295       PetscCall(VecSet(b, 1.0));
296       if (!transpose) PetscCall(KSPSolve(ksp, b, b));
297       else PetscCall(KSPSolveTranspose(ksp, b, b));
298       PetscCall(KSPGetConvergedReason(ksp, reason + 1));
299       PetscCall(KSPGetTotalIterations(ksp, iterations + 2));
300       iterations[2] -= iterations[0] + iterations[1];
301       PetscCheck(reason[0] == reason[1] && PetscAbs(iterations[0] - iterations[2]) <= 3, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "Successive calls to KSPSolve%s() did not converge for the same reason (%s v. %s) or with the same number of iterations (+/- 3, %" PetscInt_FMT " v. %" PetscInt_FMT ")", (transpose ? "Transpose" : ""), KSPConvergedReasons[reason[0]], KSPConvergedReasons[reason[1]], iterations[0], iterations[2]);
302       PetscCall(VecDestroy(&b));
303       PetscCall(ISDestroy(&is));
304       PetscCall(MatDestroy(&aux));
305     }
306   }
307   PetscCall(PetscOptionsGetBool(NULL, NULL, "-viewer", &flg, NULL));
308   if (flg) {
309     PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCHPDDM, &flg));
310     if (flg) {
311       PetscCall(PetscStrncpy(dir, "XXXXXX", sizeof(dir)));
312       if (rank == 0) PetscCall(PetscMkdtemp(dir));
313       PetscCallMPI(MPI_Bcast(dir, 6, MPI_CHAR, 0, PETSC_COMM_WORLD));
314       for (PetscInt i = 0; i < 2; ++i) {
315         PetscCall(PetscSNPrintf(name, sizeof(name), "%s/%s", dir, i == 0 ? "A" : "A.dat"));
316         PetscCall(PetscViewerASCIIOpen(PETSC_COMM_WORLD, name, &viewer));
317         PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
318         PetscCall(PCView(pc, viewer));
319         PetscCall(PetscViewerPopFormat(viewer));
320         PetscCall(PetscViewerDestroy(&viewer));
321       }
322       PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
323       if (rank == 0) PetscCall(PetscRMTree(dir));
324     }
325   }
326 #endif
327   PetscCall(KSPDestroy(&ksp));
328   PetscCall(MatDestroy(&A));
329   PetscCall(PetscFinalize());
330   return 0;
331 }
332 
333 /*TEST
334 
335    test:
336       requires: hpddm slepc datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES) defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
337       nsize: 4
338       args: -ksp_rtol 1e-3 -ksp_converged_reason -pc_type {{bjacobi hpddm}shared output} -pc_hpddm_coarse_sub_pc_type lu -sub_pc_type lu -options_left no -load_dir ${DATAFILESPATH}/matrices/hpddm/GENEO
339 
340    testset:
341       requires: hpddm slepc datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES) defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
342       suffix: define_subdomains
343       nsize: 4
344       args: -ksp_rtol 1e-3 -ksp_converged_reason -pc_hpddm_define_subdomains -options_left no -load_dir ${DATAFILESPATH}/matrices/hpddm/GENEO
345       test:
346         args: -pc_type {{asm hpddm}shared output} -pc_hpddm_coarse_sub_pc_type lu -sub_pc_type lu -viewer
347       test:
348         args: -pc_type hpddm -pc_hpddm_levels_1_st_pc_type cholesky -pc_hpddm_levels_1_eps_nev 5 -pc_hpddm_coarse_sub_pc_type lu -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_coarse_correction none
349 
350    testset:
351       requires: hpddm slepc datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES) defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
352       nsize: 4
353       args: -ksp_converged_reason -pc_type hpddm -pc_hpddm_levels_1_sub_pc_type cholesky -pc_hpddm_coarse_pc_type redundant -load_dir ${DATAFILESPATH}/matrices/hpddm/GENEO
354       test:
355         suffix: geneo
356         args: -pc_hpddm_coarse_p {{1 2}shared output} -pc_hpddm_levels_1_st_pc_type cholesky -pc_hpddm_levels_1_eps_nev {{5 15}separate output} -mat_type {{aij baij sbaij}shared output}
357       test:
358         suffix: geneo_block_splitting
359         output_file: output/ex76_geneo_pc_hpddm_levels_1_eps_nev-15.out
360         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 1[6-9]/Linear solve converged due to CONVERGED_RTOL iterations 11/g"
361         args: -pc_hpddm_coarse_p 2 -pc_hpddm_levels_1_eps_nev 15 -pc_hpddm_block_splitting -pc_hpddm_levels_1_st_pc_type lu -pc_hpddm_levels_1_eps_gen_non_hermitian -mat_type {{aij baij}shared output} -successive_solves
362       test:
363         suffix: geneo_share
364         output_file: output/ex76_geneo_pc_hpddm_levels_1_eps_nev-5.out
365         args: -pc_hpddm_levels_1_st_pc_type cholesky -pc_hpddm_levels_1_eps_nev 5 -pc_hpddm_levels_1_st_share_sub_ksp -reset {{false true}shared output}
366       test:
367         suffix: harmonic_overlap_1_define_false
368         output_file: output/ex76_geneo_share.out
369         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 1[0-3]/Linear solve converged due to CONVERGED_RTOL iterations 15/g"
370         args: -pc_hpddm_harmonic_overlap 1 -pc_hpddm_levels_1_eps_nev 30 -pc_hpddm_levels_1_eps_threshold_relative 1e+1 -pc_hpddm_levels_1_st_pc_type lu -pc_hpddm_levels_1_eps_pc_type lu -pc_hpddm_define_subdomains false -pc_hpddm_levels_1_pc_type asm -pc_hpddm_levels_1_pc_asm_overlap 2 -mat_type baij
371       test:
372         suffix: harmonic_overlap_1
373         output_file: output/ex76_geneo_share.out
374         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 1[0-3]/Linear solve converged due to CONVERGED_RTOL iterations 15/g"
375         args: -pc_hpddm_harmonic_overlap 1 -pc_hpddm_levels_1_eps_nev 30 -pc_hpddm_levels_1_eps_threshold_relative 1e+1 -pc_hpddm_levels_1_st_pc_type lu -pc_hpddm_levels_1_eps_pc_type lu -mat_type baij
376       test:
377         suffix: harmonic_overlap_1_share_petsc
378         output_file: output/ex76_geneo_share.out
379         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 1[0-3]/Linear solve converged due to CONVERGED_RTOL iterations 15/g"
380         args: -pc_hpddm_harmonic_overlap 1 -pc_hpddm_levels_1_eps_nev 30 -pc_hpddm_levels_1_eps_threshold_relative 1e+1 -pc_hpddm_levels_1_st_pc_type lu -pc_hpddm_levels_1_st_share_sub_ksp -pc_hpddm_levels_1_sub_pc_factor_mat_solver_type petsc -pc_hpddm_levels_1_eps_pc_type lu -mat_type baij
381       test:
382         requires: mumps
383         suffix: harmonic_overlap_1_share_mumps
384         output_file: output/ex76_geneo_share.out
385         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 1[0-3]/Linear solve converged due to CONVERGED_RTOL iterations 15/g"
386         args: -pc_hpddm_harmonic_overlap 1 -pc_hpddm_levels_1_eps_nev 30 -pc_hpddm_levels_1_eps_threshold_relative 1e+1 -pc_hpddm_levels_1_st_share_sub_ksp -pc_hpddm_levels_1_sub_pc_factor_mat_solver_type mumps -pc_hpddm_coarse_pc_type cholesky -pc_hpddm_coarse_pc_factor_mat_solver_type mumps -pc_hpddm_coarse_mat_mumps_icntl_15 1
387       test:
388         requires: mumps
389         suffix: harmonic_overlap_1_share_mumps_not_set_explicitly
390         output_file: output/ex76_geneo_share.out
391         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 1[0-3]/Linear solve converged due to CONVERGED_RTOL iterations 15/g"
392         args: -pc_hpddm_harmonic_overlap 1 -pc_hpddm_levels_1_eps_nev 30 -pc_hpddm_levels_1_eps_threshold_relative 1e+1 -pc_hpddm_levels_1_st_share_sub_ksp -pc_hpddm_levels_1_eps_mat_type baij
393       test:
394         requires: mkl_pardiso
395         suffix: harmonic_overlap_1_share_mkl_pardiso
396         output_file: output/ex76_geneo_share.out
397         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations [12][0-3]/Linear solve converged due to CONVERGED_RTOL iterations 15/g"
398         args: -pc_hpddm_harmonic_overlap 1 -pc_hpddm_levels_1_eps_nev 30 -pc_hpddm_levels_1_eps_threshold_relative 1e+1 -pc_hpddm_levels_1_st_share_sub_ksp -pc_hpddm_levels_1_eps_mat_type shell -pc_hpddm_levels_1_sub_pc_factor_mat_solver_type mkl_pardiso
399       test:
400         requires: mkl_pardiso !mumps
401         suffix: harmonic_overlap_1_share_mkl_pardiso_no_set_explicitly
402         output_file: output/ex76_geneo_share.out
403         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations [12][0-3]/Linear solve converged due to CONVERGED_RTOL iterations 15/g"
404         args: -pc_hpddm_harmonic_overlap 1 -pc_hpddm_levels_1_eps_nev 30 -pc_hpddm_levels_1_eps_threshold_relative 1e+1 -pc_hpddm_levels_1_st_share_sub_ksp -pc_hpddm_levels_1_eps_mat_type shell
405       test:
406         suffix: harmonic_overlap_2_threshold_relative
407         output_file: output/ex76_geneo_share.out
408         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 9/Linear solve converged due to CONVERGED_RTOL iterations 15/g"
409         args: -pc_hpddm_harmonic_overlap 2 -pc_hpddm_levels_1_svd_nsv 15 -pc_hpddm_levels_1_svd_threshold_relative 1e-1 -pc_hpddm_levels_1_st_share_sub_ksp -mat_type sbaij
410       test:
411         suffix: harmonic_overlap_2
412         output_file: output/ex76_geneo_share.out
413         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 9/Linear solve converged due to CONVERGED_RTOL iterations 15/g"
414         args: -pc_hpddm_harmonic_overlap 2 -pc_hpddm_levels_1_svd_nsv 12 -pc_hpddm_levels_1_svd_type {{trlanczos randomized}shared output} -pc_hpddm_levels_1_st_share_sub_ksp -mat_type sbaij
415 
416    testset:
417       requires: hpddm slepc datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES) defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
418       nsize: 4
419       args: -ksp_converged_reason -ksp_max_it 150 -pc_type hpddm -pc_hpddm_levels_1_eps_nev 5 -pc_hpddm_coarse_p 1 -load_dir ${DATAFILESPATH}/matrices/hpddm/GENEO -pc_hpddm_define_subdomains
420       test:
421         suffix: geneo_share_cholesky
422         output_file: output/ex76_geneo_share.out
423         # extra -pc_hpddm_levels_1_eps_gen_non_hermitian needed to avoid failures with PETSc Cholesky
424         args: -pc_hpddm_levels_1_sub_pc_type cholesky -pc_hpddm_levels_1_st_pc_type cholesky -mat_type {{aij sbaij}shared output} -pc_hpddm_levels_1_eps_gen_non_hermitian -pc_hpddm_has_neumann -pc_hpddm_levels_1_st_share_sub_ksp {{false true}shared output} -successive_solves
425       test:
426         suffix: geneo_share_cholesky_matstructure
427         output_file: output/ex76_geneo_share.out
428         # extra -pc_hpddm_levels_1_eps_gen_non_hermitian needed to avoid failures with PETSc Cholesky
429         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 14/Linear solve converged due to CONVERGED_RTOL iterations 15/g"
430         args: -pc_hpddm_levels_1_sub_pc_type cholesky -mat_type {{baij sbaij}shared output} -pc_hpddm_levels_1_eps_gen_non_hermitian -pc_hpddm_levels_1_st_share_sub_ksp -pc_hpddm_levels_1_st_matstructure same -set_rhs {{false true} shared output}
431       test:
432         suffix: geneo_transpose
433         output_file: output/ex76_geneo_share.out
434         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 1[234]/Linear solve converged due to CONVERGED_RTOL iterations 15/g" -e "s/Linear solve converged due to CONVERGED_RTOL iterations 26/Linear solve converged due to CONVERGED_RTOL iterations 15/g"
435         args: -pc_hpddm_levels_1_sub_pc_type cholesky -pc_hpddm_levels_1_st_pc_type cholesky -pc_hpddm_levels_1_eps_gen_non_hermitian -pc_hpddm_has_neumann -pc_hpddm_levels_1_st_share_sub_ksp -successive_solves -transpose -pc_hpddm_coarse_correction {{additive deflated balanced}shared output}
436       test:
437         suffix: geneo_explicittranspose
438         output_file: output/ex76_geneo_share.out
439         filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 1[234]/Linear solve converged due to CONVERGED_RTOL iterations 15/g" -e "s/Linear solve converged due to CONVERGED_RTOL iterations 26/Linear solve converged due to CONVERGED_RTOL iterations 15/g"
440         args: -pc_hpddm_levels_1_sub_pc_type cholesky -pc_hpddm_levels_1_st_pc_type cholesky -pc_hpddm_levels_1_eps_gen_non_hermitian -pc_hpddm_has_neumann -pc_hpddm_levels_1_st_share_sub_ksp -transpose -ksp_use_explicittranspose -rhs 2
441       test:
442         requires: mumps
443         suffix: geneo_share_lu
444         output_file: output/ex76_geneo_share.out
445         # extra -pc_factor_mat_solver_type mumps needed to avoid failures with PETSc LU
446         args: -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_levels_1_st_pc_type lu -mat_type baij -pc_hpddm_levels_1_st_pc_factor_mat_solver_type mumps -pc_hpddm_levels_1_sub_pc_factor_mat_solver_type mumps -pc_hpddm_has_neumann -pc_hpddm_levels_1_st_share_sub_ksp {{false true}shared output} -pc_hpddm_coarse_pc_factor_mat_solver_type mumps -pc_hpddm_coarse_mat_mumps_icntl_15 1
447       test:
448         requires: mumps
449         suffix: geneo_share_lu_matstructure
450         output_file: output/ex76_geneo_share.out
451         # extra -pc_factor_mat_solver_type mumps needed to avoid failures with PETSc LU
452         args: -pc_hpddm_levels_1_sub_pc_type lu -mat_type aij -pc_hpddm_levels_1_sub_pc_factor_mat_solver_type mumps -pc_hpddm_levels_1_st_share_sub_ksp -pc_hpddm_levels_1_st_matstructure {{same different}shared output} -pc_hpddm_levels_1_st_pc_type lu -pc_hpddm_levels_1_st_pc_factor_mat_solver_type mumps -successive_solves -pc_hpddm_levels_1_eps_target 1e-5
453       test:
454         suffix: geneo_share_not_asm
455         output_file: output/ex76_geneo_pc_hpddm_levels_1_eps_nev-5.out
456         # extra -pc_hpddm_levels_1_eps_gen_non_hermitian needed to avoid failures with PETSc Cholesky
457         args: -pc_hpddm_levels_1_sub_pc_type cholesky -pc_hpddm_levels_1_st_pc_type cholesky -pc_hpddm_levels_1_eps_gen_non_hermitian -pc_hpddm_has_neumann -pc_hpddm_levels_1_st_share_sub_ksp true -pc_hpddm_levels_1_pc_type gasm -successive_solves
458 
459    test:
460       requires: hpddm slepc datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES) defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
461       suffix: fgmres_geneo_20_p_2
462       nsize: 4
463       args: -ksp_converged_reason -pc_type hpddm -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_levels_1_eps_nev 20 -pc_hpddm_coarse_p 2 -pc_hpddm_coarse_pc_type redundant -ksp_type fgmres -pc_hpddm_coarse_mat_type {{baij sbaij}shared output} -pc_hpddm_log_separate {{false true}shared output} -load_dir ${DATAFILESPATH}/matrices/hpddm/GENEO
464 
465    testset:
466       requires: hpddm slepc datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES) defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
467       output_file: output/ex76_fgmres_geneo_20_p_2.out
468       nsize: 4
469       args: -ksp_converged_reason -pc_type hpddm -pc_hpddm_levels_1_sub_pc_type cholesky -pc_hpddm_levels_1_eps_nev 20 -pc_hpddm_levels_2_p 2 -pc_hpddm_levels_2_mat_type {{baij sbaij}shared output} -pc_hpddm_levels_2_eps_nev {{5 20}shared output} -pc_hpddm_levels_2_sub_pc_type cholesky -pc_hpddm_levels_2_ksp_type gmres -ksp_type fgmres -pc_hpddm_coarse_mat_type {{baij sbaij}shared output} -load_dir ${DATAFILESPATH}/matrices/hpddm/GENEO
470       test:
471         suffix: fgmres_geneo_20_p_2_geneo
472         args: -mat_type {{aij sbaij}shared output}
473       test:
474         suffix: fgmres_geneo_20_p_2_geneo_algebraic
475         args: -pc_hpddm_levels_2_st_pc_type mat
476    # PCHPDDM + KSPHPDDM test to exercise multilevel + multiple RHS in one go
477    testset:
478       requires: hpddm slepc datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES) defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
479       output_file: output/ex76_fgmres_geneo_20_p_2.out
480       # for -pc_hpddm_coarse_correction additive
481       filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 37/Linear solve converged due to CONVERGED_RTOL iterations 25/g"
482       nsize: 4
483       args: -ksp_converged_reason -pc_type hpddm -pc_hpddm_levels_1_sub_pc_type cholesky -pc_hpddm_levels_1_eps_nev 20 -ksp_type hpddm -ksp_hpddm_variant flexible -pc_hpddm_coarse_mat_type baij -load_dir ${DATAFILESPATH}/matrices/hpddm/GENEO -rhs 4
484       test:
485         suffix: fgmres_geneo_20_p_2_geneo_rhs
486         args: -pc_hpddm_levels_2_p 2 -pc_hpddm_levels_2_mat_type baij -pc_hpddm_levels_2_eps_nev 5 -pc_hpddm_levels_2_sub_pc_type cholesky -pc_hpddm_levels_2_ksp_max_it 10 -pc_hpddm_levels_2_ksp_type hpddm -pc_hpddm_levels_2_ksp_hpddm_type gmres -mat_type aij -pc_hpddm_coarse_correction {{additive deflated balanced}shared output}
487       test:
488         requires: cuda
489         suffix: fgmres_geneo_20_rhs_cuda
490         args: -mat_type aijcusparse -pc_hpddm_coarse_correction {{deflated balanced}shared output}
491 
492    testset:
493       requires: hpddm slepc datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES) defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES) mumps defined(PETSC_HAVE_OPENMP_SUPPORT)
494       filter: grep -E -e "Linear solve" -e "      executing" | sed -e "s/MPI =      1/MPI =      2/g" -e "s/OMP =      1/OMP =      2/g"
495       nsize: 4
496       args: -ksp_converged_reason -pc_type hpddm -pc_hpddm_levels_1_sub_pc_type cholesky -pc_hpddm_levels_1_eps_nev 15 -pc_hpddm_levels_1_st_pc_type cholesky -pc_hpddm_coarse_p {{1 2}shared output} -load_dir ${DATAFILESPATH}/matrices/hpddm/GENEO -pc_hpddm_coarse_pc_factor_mat_solver_type mumps -pc_hpddm_coarse_mat_mumps_icntl_4 2 -pc_hpddm_coarse_mat_mumps_use_omp_threads {{1 2}shared output}
497       test:
498         suffix: geneo_mumps_use_omp_threads_1
499         output_file: output/ex76_geneo_mumps_use_omp_threads.out
500         args: -pc_hpddm_coarse_mat_type {{baij sbaij}shared output}
501       test:
502         suffix: geneo_mumps_use_omp_threads_2
503         output_file: output/ex76_geneo_mumps_use_omp_threads.out
504         args: -pc_hpddm_coarse_mat_type aij -pc_hpddm_levels_1_eps_threshold_absolute 0.4 -pc_hpddm_coarse_pc_type cholesky -pc_hpddm_coarse_mat_filter 1e-12
505 
506    testset: # converge really poorly because of a tiny -pc_hpddm_levels_1_eps_threshold_absolute, but needed for proper code coverage where some subdomains don't call EPSSolve()
507       requires: hpddm slepc datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES) defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
508       nsize: 4
509       args: -ksp_converged_reason -pc_type hpddm -pc_hpddm_levels_1_sub_pc_type cholesky -pc_hpddm_levels_1_eps_threshold_absolute 0.005 -pc_hpddm_levels_1_eps_use_inertia -load_dir ${DATAFILESPATH}/matrices/hpddm/GENEO -pc_hpddm_levels_1_st_share_sub_ksp -pc_hpddm_define_subdomains -pc_hpddm_has_neumann -ksp_rtol 0.9
510       filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 1/Linear solve converged due to CONVERGED_RTOL iterations 141/g"
511       test:
512         suffix: inertia_petsc
513         output_file: output/ex76_1.out
514         args: -pc_hpddm_levels_1_sub_pc_factor_mat_solver_type petsc
515       test:
516         suffix: inertia_mumps
517         output_file: output/ex76_1.out
518         requires: mumps
519 
520    test:
521       requires: hpddm slepc datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES) defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
522       suffix: reuse_symbolic
523       output_file: output/empty.out
524       nsize: 4
525       args: -pc_type hpddm -pc_hpddm_levels_1_sub_pc_type cholesky -pc_hpddm_levels_1_eps_nev 20 -rhs 4 -pc_hpddm_coarse_correction {{additive deflated balanced}shared output} -ksp_pc_side {{left right}shared output} -ksp_max_it 20 -ksp_type hpddm -load_dir ${DATAFILESPATH}/matrices/hpddm/GENEO -pc_hpddm_define_subdomains -ksp_error_if_not_converged -transpose {{true false} shared output}
526 
527 TEST*/
528