1 #include <petsc/private/pcbddcimpl.h>
2 #include <petsc/private/pcbddcprivateimpl.h>
3 #include <../src/mat/impls/dense/seq/dense.h>
4 #include <petscblaslapack.h>
5
6 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *);
7 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *);
8 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC, Vec, Vec);
9 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC, Vec, Vec);
10
11 /* if v2 is not present, correction is done in-place */
PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx,Vec v,Vec v2,PetscBool sol,PetscBool full)12 PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13 {
14 PetscScalar *array;
15 PetscScalar *array2;
16
17 PetscFunctionBegin;
18 if (!ctx->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
19 if (sol && full) {
20 PetscInt n_I, size_schur;
21
22 /* get sizes */
23 PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
24 PetscCall(VecGetSize(v, &n_I));
25 n_I = n_I - size_schur;
26 /* get schur sol from array */
27 PetscCall(VecGetArray(v, &array));
28 PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
29 PetscCall(VecRestoreArray(v, &array));
30 /* apply interior sol correction */
31 PetscCall(MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work));
32 PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
33 PetscCall(MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v));
34 }
35 if (v2) {
36 PetscInt nl;
37
38 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
39 PetscCall(VecGetLocalSize(v2, &nl));
40 PetscCall(VecGetArray(v2, &array2));
41 PetscCall(PetscArraycpy(array2, array, nl));
42 } else {
43 PetscCall(VecGetArray(v, &array));
44 array2 = array;
45 }
46 if (!sol) { /* change rhs */
47 PetscInt n;
48 for (n = 0; n < ctx->benign_n; n++) {
49 PetscScalar sum = 0.;
50 const PetscInt *cols;
51 PetscInt nz, i;
52
53 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
54 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
55 for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
56 #if defined(PETSC_USE_COMPLEX)
57 sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
58 #else
59 sum = -sum / nz;
60 #endif
61 for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
62 ctx->benign_save_vals[n] = array2[cols[nz - 1]];
63 array2[cols[nz - 1]] = sum;
64 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
65 }
66 } else {
67 PetscInt n;
68 for (n = 0; n < ctx->benign_n; n++) {
69 PetscScalar sum = 0.;
70 const PetscInt *cols;
71 PetscInt nz, i;
72 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
73 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
74 for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
75 #if defined(PETSC_USE_COMPLEX)
76 sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
77 #else
78 sum = -sum / nz;
79 #endif
80 for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
81 array2[cols[nz - 1]] = ctx->benign_save_vals[n];
82 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
83 }
84 }
85 if (v2) {
86 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
87 PetscCall(VecRestoreArray(v2, &array2));
88 } else {
89 PetscCall(VecRestoreArray(v, &array));
90 }
91 if (!sol && full) {
92 Vec usedv;
93 PetscInt n_I, size_schur;
94
95 /* get sizes */
96 PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
97 PetscCall(VecGetSize(v, &n_I));
98 n_I = n_I - size_schur;
99 /* compute schur rhs correction */
100 if (v2) {
101 usedv = v2;
102 } else {
103 usedv = v;
104 }
105 /* apply schur rhs correction */
106 PetscCall(MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work));
107 PetscCall(VecGetArrayRead(usedv, (const PetscScalar **)&array));
108 PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
109 PetscCall(VecRestoreArrayRead(usedv, (const PetscScalar **)&array));
110 PetscCall(MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec));
111 PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
112 }
113 PetscFunctionReturn(PETSC_SUCCESS);
114 }
115
PCBDDCReuseSolvers_Solve_Private(PC pc,Vec rhs,Vec sol,PetscBool transpose,PetscBool full)116 static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
117 {
118 PCBDDCReuseSolvers ctx;
119 PetscBool copy = PETSC_FALSE;
120
121 PetscFunctionBegin;
122 PetscCall(PCShellGetContext(pc, &ctx));
123 if (full) {
124 PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
125 #if defined(PETSC_HAVE_MKL_PARDISO)
126 PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
127 #endif
128 copy = ctx->has_vertices;
129 } else { /* interior solver */
130 PetscCall(MatMumpsSetIcntl(ctx->F, 26, 0));
131 #if defined(PETSC_HAVE_MKL_PARDISO)
132 PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 1));
133 #endif
134 copy = PETSC_TRUE;
135 }
136 /* copy rhs into factored matrix workspace */
137 if (copy) {
138 PetscInt n;
139 PetscScalar *array, *array_solver;
140
141 PetscCall(VecGetLocalSize(rhs, &n));
142 PetscCall(VecGetArrayRead(rhs, (const PetscScalar **)&array));
143 PetscCall(VecGetArray(ctx->rhs, &array_solver));
144 PetscCall(PetscArraycpy(array_solver, array, n));
145 PetscCall(VecRestoreArray(ctx->rhs, &array_solver));
146 PetscCall(VecRestoreArrayRead(rhs, (const PetscScalar **)&array));
147
148 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full));
149 if (transpose) {
150 PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol));
151 } else {
152 PetscCall(MatSolve(ctx->F, ctx->rhs, ctx->sol));
153 }
154 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full));
155
156 /* get back data to caller worskpace */
157 PetscCall(VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
158 PetscCall(VecGetArray(sol, &array));
159 PetscCall(PetscArraycpy(array, array_solver, n));
160 PetscCall(VecRestoreArray(sol, &array));
161 PetscCall(VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
162 } else {
163 if (ctx->benign_n) {
164 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full));
165 if (transpose) {
166 PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, sol));
167 } else {
168 PetscCall(MatSolve(ctx->F, ctx->rhs, sol));
169 }
170 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full));
171 } else {
172 if (transpose) {
173 PetscCall(MatSolveTranspose(ctx->F, rhs, sol));
174 } else {
175 PetscCall(MatSolve(ctx->F, rhs, sol));
176 }
177 }
178 }
179 /* restore defaults */
180 PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
181 #if defined(PETSC_HAVE_MKL_PARDISO)
182 PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
183 #endif
184 PetscFunctionReturn(PETSC_SUCCESS);
185 }
186
PCBDDCReuseSolvers_Correction(PC pc,Vec rhs,Vec sol)187 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
188 {
189 PetscFunctionBegin;
190 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE));
191 PetscFunctionReturn(PETSC_SUCCESS);
192 }
193
PCBDDCReuseSolvers_CorrectionTranspose(PC pc,Vec rhs,Vec sol)194 static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
195 {
196 PetscFunctionBegin;
197 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE));
198 PetscFunctionReturn(PETSC_SUCCESS);
199 }
200
PCBDDCReuseSolvers_Interior(PC pc,Vec rhs,Vec sol)201 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
202 {
203 PetscFunctionBegin;
204 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE));
205 PetscFunctionReturn(PETSC_SUCCESS);
206 }
207
PCBDDCReuseSolvers_InteriorTranspose(PC pc,Vec rhs,Vec sol)208 static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
209 {
210 PetscFunctionBegin;
211 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE));
212 PetscFunctionReturn(PETSC_SUCCESS);
213 }
214
PCBDDCReuseSolvers_View(PC pc,PetscViewer viewer)215 static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
216 {
217 PCBDDCReuseSolvers ctx;
218 PetscBool isascii;
219
220 PetscFunctionBegin;
221 PetscCall(PCShellGetContext(pc, &ctx));
222 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
223 if (isascii) PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO));
224 PetscCall(MatView(ctx->F, viewer));
225 if (isascii) PetscCall(PetscViewerPopFormat(viewer));
226 PetscFunctionReturn(PETSC_SUCCESS);
227 }
228
PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)229 static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
230 {
231 PetscInt i;
232
233 PetscFunctionBegin;
234 PetscCall(MatDestroy(&reuse->F));
235 PetscCall(VecDestroy(&reuse->sol));
236 PetscCall(VecDestroy(&reuse->rhs));
237 PetscCall(PCDestroy(&reuse->interior_solver));
238 PetscCall(PCDestroy(&reuse->correction_solver));
239 PetscCall(ISDestroy(&reuse->is_R));
240 PetscCall(ISDestroy(&reuse->is_B));
241 PetscCall(VecScatterDestroy(&reuse->correction_scatter_B));
242 PetscCall(VecDestroy(&reuse->sol_B));
243 PetscCall(VecDestroy(&reuse->rhs_B));
244 for (i = 0; i < reuse->benign_n; i++) PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i]));
245 PetscCall(PetscFree(reuse->benign_zerodiag_subs));
246 PetscCall(PetscFree(reuse->benign_save_vals));
247 PetscCall(MatDestroy(&reuse->benign_csAIB));
248 PetscCall(MatDestroy(&reuse->benign_AIIm1ones));
249 PetscCall(VecDestroy(&reuse->benign_corr_work));
250 PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec));
251 PetscFunctionReturn(PETSC_SUCCESS);
252 }
253
PCBDDCReuseSolvers_Destroy(PC pc)254 static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc)
255 {
256 PCBDDCReuseSolvers ctx;
257
258 PetscFunctionBegin;
259 PetscCall(PCShellGetContext(pc, &ctx));
260 PetscCall(PCBDDCReuseSolversReset(ctx));
261 PetscCall(PetscFree(ctx));
262 PetscCall(PCShellSetContext(pc, NULL));
263 PetscFunctionReturn(PETSC_SUCCESS);
264 }
265
PCBDDCComputeExplicitSchur(Mat M,PetscBool issym,MatReuse reuse,Mat * S)266 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
267 {
268 Mat B, C, D, Bd, Cd, AinvBd;
269 KSP ksp;
270 PC pc;
271 PetscBool isLU, isILU, isCHOL, Bdense, Cdense;
272 PetscReal fill = 2.0;
273 PetscInt n_I;
274 PetscMPIInt size;
275
276 PetscFunctionBegin;
277 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M), &size));
278 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for parallel matrices");
279 if (reuse == MAT_REUSE_MATRIX) {
280 PetscBool Sdense;
281
282 PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense));
283 PetscCheck(Sdense, PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "S should dense");
284 }
285 PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D));
286 PetscCall(MatSchurComplementGetKSP(M, &ksp));
287 PetscCall(KSPGetPC(ksp, &pc));
288 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU));
289 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU));
290 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL));
291 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense));
292 PetscCall(PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense));
293 PetscCall(MatGetSize(B, &n_I, NULL));
294 if (n_I) {
295 if (!Bdense) {
296 PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd));
297 } else {
298 Bd = B;
299 }
300
301 if (isLU || isILU || isCHOL) {
302 Mat fact;
303 PetscCall(KSPSetUp(ksp));
304 PetscCall(PCFactorGetMatrix(pc, &fact));
305 PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
306 PetscCall(MatMatSolve(fact, Bd, AinvBd));
307 } else {
308 PetscBool ex = PETSC_TRUE;
309
310 if (ex) {
311 Mat Ainvd;
312
313 PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd));
314 PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd));
315 PetscCall(MatDestroy(&Ainvd));
316 } else {
317 Vec sol, rhs;
318 PetscScalar *arrayrhs, *arraysol;
319 PetscInt i, nrhs, n;
320
321 PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
322 PetscCall(MatGetSize(Bd, &n, &nrhs));
323 PetscCall(MatDenseGetArray(Bd, &arrayrhs));
324 PetscCall(MatDenseGetArray(AinvBd, &arraysol));
325 PetscCall(KSPGetSolution(ksp, &sol));
326 PetscCall(KSPGetRhs(ksp, &rhs));
327 for (i = 0; i < nrhs; i++) {
328 PetscCall(VecPlaceArray(rhs, arrayrhs + i * n));
329 PetscCall(VecPlaceArray(sol, arraysol + i * n));
330 PetscCall(KSPSolve(ksp, rhs, sol));
331 PetscCall(VecResetArray(rhs));
332 PetscCall(VecResetArray(sol));
333 }
334 PetscCall(MatDenseRestoreArray(Bd, &arrayrhs));
335 PetscCall(MatDenseRestoreArray(AinvBd, &arrayrhs));
336 }
337 }
338 if (!Bdense & !issym) PetscCall(MatDestroy(&Bd));
339
340 if (!issym) {
341 if (!Cdense) {
342 PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd));
343 } else {
344 Cd = C;
345 }
346 PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S));
347 if (!Cdense) PetscCall(MatDestroy(&Cd));
348 } else {
349 PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S));
350 if (!Bdense) PetscCall(MatDestroy(&Bd));
351 }
352 PetscCall(MatDestroy(&AinvBd));
353 }
354
355 if (D) {
356 Mat Dd;
357 PetscBool Ddense;
358
359 PetscCall(PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense));
360 if (!Ddense) {
361 PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd));
362 } else {
363 Dd = D;
364 }
365 if (n_I) {
366 PetscCall(MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN));
367 } else {
368 if (reuse == MAT_INITIAL_MATRIX) {
369 PetscCall(MatDuplicate(Dd, MAT_COPY_VALUES, S));
370 } else {
371 PetscCall(MatCopy(Dd, *S, SAME_NONZERO_PATTERN));
372 }
373 }
374 if (!Ddense) PetscCall(MatDestroy(&Dd));
375 } else {
376 PetscCall(MatScale(*S, -1.0));
377 }
378 PetscFunctionReturn(PETSC_SUCCESS);
379 }
380
PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs,Mat Ain,Mat Sin,PetscBool exact_schur,PetscInt xadj[],PetscInt adjncy[],PetscInt nlayers,Vec scaling,PetscBool compute_Stilda,PetscBool reuse_solvers,PetscBool benign_trick,PetscInt benign_n,PetscInt benign_p0_lidx[],IS benign_zerodiag_subs[],Mat change,IS change_primal)381 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
382 {
383 Mat F, A_II, A_IB, A_BI, A_BB, AE_II;
384 Mat S_all;
385 Vec gstash, lstash;
386 VecScatter sstash;
387 IS is_I, is_I_layer;
388 IS all_subsets, all_subsets_mult, all_subsets_n;
389 PetscScalar *stasharray, *Bwork;
390 PetscInt *all_local_idx_N, *all_local_subid_N = NULL;
391 PetscInt *auxnum1, *auxnum2;
392 PetscInt *local_subs = sub_schurs->graph->local_subs;
393 PetscInt i, subset_size, max_subset_size, n_local_subs = sub_schurs->graph->n_local_subs;
394 PetscInt n_B, extra, local_size, global_size;
395 PetscInt local_stash_size;
396 PetscBLASInt B_N, B_ierr, B_lwork, *pivots;
397 MPI_Comm comm_n;
398 PetscBool deluxe = PETSC_TRUE;
399 PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
400 PetscViewer matl_dbg_viewer = NULL;
401 PetscBool flg, multi_element = sub_schurs->graph->multi_element;
402
403 PetscFunctionBegin;
404 PetscCall(MatDestroy(&sub_schurs->A));
405 PetscCall(MatDestroy(&sub_schurs->S));
406 if (Ain) {
407 PetscCall(PetscObjectReference((PetscObject)Ain));
408 sub_schurs->A = Ain;
409 }
410
411 PetscCall(PetscObjectReference((PetscObject)Sin));
412 sub_schurs->S = Sin;
413 if (sub_schurs->schur_explicit) sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
414
415 /* preliminary checks */
416 PetscCheck(sub_schurs->schur_explicit || !compute_Stilda, PetscObjectComm((PetscObject)sub_schurs->l2gmap), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
417
418 if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
419
420 /* debug (MATLAB) */
421 if (sub_schurs->debug) {
422 PetscMPIInt size, rank;
423 PetscInt nr, *print_schurs_ranks, print_schurs = PETSC_FALSE;
424
425 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size));
426 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
427 nr = size;
428 PetscCall(PetscMalloc1(nr, &print_schurs_ranks));
429 PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
430 PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg));
431 if (!flg) print_schurs = PETSC_TRUE;
432 else {
433 print_schurs = PETSC_FALSE;
434 for (i = 0; i < nr; i++)
435 if (print_schurs_ranks[i] == rank) {
436 print_schurs = PETSC_TRUE;
437 break;
438 }
439 }
440 PetscOptionsEnd();
441 PetscCall(PetscFree(print_schurs_ranks));
442 if (print_schurs) {
443 char filename[256];
444
445 PetscCall(PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank));
446 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer));
447 PetscCall(PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB));
448 }
449 }
450
451 /* DEBUG: turn on/off multi-element code path */
452 PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_multielement_code", &multi_element, NULL));
453 if (n_local_subs == 0) multi_element = PETSC_FALSE;
454
455 /* restrict work on active processes */
456 if (sub_schurs->restrict_comm) {
457 PetscSubcomm subcomm;
458 PetscMPIInt color, rank;
459
460 color = 0;
461 if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
462 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
463 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm));
464 PetscCall(PetscSubcommSetNumber(subcomm, 2));
465 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
466 PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL));
467 PetscCall(PetscSubcommDestroy(&subcomm));
468 if (!sub_schurs->n_subs) {
469 PetscCall(PetscCommDestroy(&comm_n));
470 PetscFunctionReturn(PETSC_SUCCESS);
471 }
472 } else {
473 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL));
474 }
475
476 /* get Schur complement matrices */
477 if (!sub_schurs->schur_explicit) {
478 Mat tA_IB, tA_BI, tA_BB;
479 PetscBool isseqsbaij;
480 PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB));
481 PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij));
482 if (isseqsbaij) {
483 PetscCall(MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB));
484 PetscCall(MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB));
485 PetscCall(MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI));
486 } else {
487 PetscCall(PetscObjectReference((PetscObject)tA_BB));
488 A_BB = tA_BB;
489 PetscCall(PetscObjectReference((PetscObject)tA_IB));
490 A_IB = tA_IB;
491 PetscCall(PetscObjectReference((PetscObject)tA_BI));
492 A_BI = tA_BI;
493 }
494 } else {
495 A_II = NULL;
496 A_IB = NULL;
497 A_BI = NULL;
498 A_BB = NULL;
499 }
500 S_all = NULL;
501
502 /* determine interior problems */
503 PetscCall(ISGetLocalSize(sub_schurs->is_I, &i));
504 if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
505 PetscBT touched;
506 const PetscInt *idx_B;
507 PetscInt n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering;
508
509 PetscCheck(xadj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot request layering without adjacency");
510 /* get sizes */
511 PetscCall(ISGetLocalSize(sub_schurs->is_I, &n_I));
512 PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
513
514 PetscCall(PetscMalloc1(n_I + n_B, &local_numbering));
515 PetscCall(PetscBTCreate(n_I + n_B, &touched));
516 PetscCall(PetscBTMemzero(n_I + n_B, touched));
517
518 /* all boundary dofs must be skipped when adding layers */
519 PetscCall(ISGetIndices(sub_schurs->is_B, &idx_B));
520 for (j = 0; j < n_B; j++) PetscCall(PetscBTSet(touched, idx_B[j]));
521 PetscCall(PetscArraycpy(local_numbering, idx_B, n_B));
522 PetscCall(ISRestoreIndices(sub_schurs->is_B, &idx_B));
523
524 /* add prescribed number of layers of dofs */
525 n_local_dofs = n_B;
526 n_prev_added = n_B;
527 for (layer = 0; layer < nlayers; layer++) {
528 PetscInt n_added = 0;
529 if (n_local_dofs == n_I + n_B) break;
530 PetscCheck(n_local_dofs <= n_I + n_B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")", layer, n_local_dofs, n_I + n_B);
531 PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added));
532 n_prev_added = n_added;
533 n_local_dofs += n_added;
534 if (!n_added) break;
535 }
536 PetscCall(PetscBTDestroy(&touched));
537
538 /* IS for I layer dofs in original numbering */
539 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer));
540 PetscCall(PetscFree(local_numbering));
541 PetscCall(ISSort(is_I_layer));
542 /* IS for I layer dofs in I numbering */
543 if (!sub_schurs->schur_explicit) {
544 ISLocalToGlobalMapping ItoNmap;
545 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap));
546 PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I));
547 PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap));
548
549 /* II block */
550 PetscCall(MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II));
551 }
552 } else {
553 PetscInt n_I;
554
555 /* IS for I dofs in original numbering */
556 PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I));
557 is_I_layer = sub_schurs->is_I;
558
559 /* IS for I dofs in I numbering (strided 1) */
560 if (!sub_schurs->schur_explicit) {
561 PetscCall(ISGetSize(sub_schurs->is_I, &n_I));
562 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I));
563
564 /* II block is the same */
565 PetscCall(PetscObjectReference((PetscObject)A_II));
566 AE_II = A_II;
567 }
568 }
569
570 /* Get info on subset sizes and sum of all subsets sizes */
571 max_subset_size = 0;
572 local_size = 0;
573 for (i = 0; i < sub_schurs->n_subs; i++) {
574 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
575 max_subset_size = PetscMax(subset_size, max_subset_size);
576 local_size += subset_size;
577 }
578
579 /* Work arrays for local indices */
580 extra = 0;
581 PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
582 if (sub_schurs->schur_explicit && is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &extra));
583 PetscCall(PetscMalloc1(n_B + extra, &all_local_idx_N));
584 if (multi_element) PetscCall(PetscMalloc1(n_B + extra, &all_local_subid_N));
585 if (extra) {
586 const PetscInt *idxs;
587 PetscCall(ISGetIndices(is_I_layer, &idxs));
588 PetscCall(PetscArraycpy(all_local_idx_N, idxs, extra));
589 if (multi_element)
590 for (PetscInt j = 0; j < extra; j++) all_local_subid_N[j] = local_subs[idxs[j]];
591 PetscCall(ISRestoreIndices(is_I_layer, &idxs));
592 }
593 PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum1));
594 PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum2));
595
596 /* Get local indices in local numbering */
597 local_size = 0;
598 local_stash_size = 0;
599 for (i = 0; i < sub_schurs->n_subs; i++) {
600 const PetscInt *idxs;
601
602 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
603 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
604 /* start (smallest in global ordering) and multiplicity */
605 auxnum1[i] = idxs[0];
606 auxnum2[i] = subset_size * subset_size;
607 /* subset indices in local numbering */
608 PetscCall(PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size));
609 if (multi_element)
610 for (PetscInt j = 0; j < subset_size; j++) all_local_subid_N[j + local_size + extra] = local_subs[idxs[j]];
611 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
612 local_size += subset_size;
613 local_stash_size += subset_size * subset_size;
614 }
615
616 /* allocate extra workspace needed only for GETRI or SYTRF when inverting the blocks or the entire Schur complement */
617 use_potr = use_sytr = PETSC_FALSE;
618 if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
619 use_potr = PETSC_TRUE;
620 } else if (sub_schurs->is_symmetric) {
621 use_sytr = PETSC_TRUE;
622 }
623 if (local_size && !use_potr && compute_Stilda) {
624 PetscScalar lwork, dummyscalar = 0.;
625 PetscBLASInt dummyint = 0;
626
627 B_lwork = -1;
628 PetscCall(PetscBLASIntCast(local_size, &B_N));
629 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
630 if (use_sytr) {
631 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
632 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
633 } else {
634 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
635 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
636 }
637 PetscCall(PetscFPTrapPop());
638 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
639 PetscCall(PetscMalloc2(B_lwork, &Bwork, B_N, &pivots));
640 } else {
641 Bwork = NULL;
642 pivots = NULL;
643 }
644
645 /* prepare data for summing up properly schurs on subsets */
646 PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n));
647 PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets));
648 PetscCall(ISDestroy(&all_subsets_n));
649 PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult));
650 PetscCall(ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n));
651 PetscCall(ISDestroy(&all_subsets));
652 PetscCall(ISDestroy(&all_subsets_mult));
653 PetscCall(ISGetLocalSize(all_subsets_n, &i));
654 PetscCheck(i == local_stash_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT, i, local_stash_size);
655 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash));
656 PetscCall(VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash));
657 PetscCall(VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash));
658 PetscCall(ISDestroy(&all_subsets_n));
659
660 /* subset indices in local boundary numbering */
661 if (!sub_schurs->is_Ej_all) {
662 PetscInt *all_local_idx_B;
663
664 PetscCall(PetscMalloc1(local_size, &all_local_idx_B));
665 PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B));
666 PetscCheck(subset_size == local_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT, subset_size, local_size);
667 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all));
668 }
669
670 if (change) {
671 ISLocalToGlobalMapping BtoS;
672 IS change_primal_B;
673 IS change_primal_all;
674
675 PetscCheck(!sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
676 PetscCheck(!sub_schurs->change, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
677 PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub));
678 for (i = 0; i < sub_schurs->n_subs; i++) {
679 ISLocalToGlobalMapping NtoS;
680 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS));
681 PetscCall(ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i]));
682 PetscCall(ISLocalToGlobalMappingDestroy(&NtoS));
683 }
684 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B));
685 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS));
686 PetscCall(ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all));
687 PetscCall(ISLocalToGlobalMappingDestroy(&BtoS));
688 PetscCall(ISDestroy(&change_primal_B));
689 PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change));
690 for (i = 0; i < sub_schurs->n_subs; i++) {
691 Mat change_sub;
692
693 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
694 PetscCall(KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i]));
695 PetscCall(KSPSetNestLevel(sub_schurs->change[i], 1)); /* do not seem to have direct access to a PC from which to get the level of nests */
696 PetscCall(KSPSetType(sub_schurs->change[i], KSPPREONLY));
697 if (!sub_schurs->change_with_qr) {
698 PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub));
699 } else {
700 Mat change_subt;
701 PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt));
702 PetscCall(MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub));
703 PetscCall(MatDestroy(&change_subt));
704 }
705 PetscCall(KSPSetOperators(sub_schurs->change[i], change_sub, change_sub));
706 PetscCall(MatDestroy(&change_sub));
707 PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix));
708 PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_"));
709 }
710 PetscCall(ISDestroy(&change_primal_all));
711 }
712
713 /* Local matrix of all local Schur on subsets (transposed) */
714 if (!sub_schurs->S_Ej_all) {
715 Mat T;
716 PetscScalar *v;
717 PetscInt *ii, *jj;
718 PetscInt cum, i, j, k;
719
720 /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
721 Allocate properly a representative matrix and duplicate */
722 PetscCall(PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v));
723 ii[0] = 0;
724 cum = 0;
725 for (i = 0; i < sub_schurs->n_subs; i++) {
726 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
727 for (j = 0; j < subset_size; j++) {
728 const PetscInt row = cum + j;
729 PetscInt col = cum;
730
731 ii[row + 1] = ii[row] + subset_size;
732 for (k = ii[row]; k < ii[row + 1]; k++) {
733 jj[k] = col;
734 col++;
735 }
736 }
737 cum += subset_size;
738 }
739 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T));
740 PetscCall(MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all));
741 PetscCall(MatDestroy(&T));
742 PetscCall(PetscFree3(ii, jj, v));
743 }
744 /* matrices for deluxe scaling and adaptive selection */
745 if (compute_Stilda) {
746 if (!sub_schurs->sum_S_Ej_tilda_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all));
747 if (!sub_schurs->sum_S_Ej_inv_all && deluxe) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all));
748 }
749
750 /* Compute Schur complements explicitly */
751 F = NULL;
752 if (!sub_schurs->schur_explicit) {
753 /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
754 it is not efficient, unless the economic version of the scaling is used */
755 Mat S_Ej_expl;
756 PetscScalar *work;
757 PetscInt j, *dummy_idx;
758 PetscBool Sdense;
759
760 PetscCall(PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work));
761 local_size = 0;
762 for (i = 0; i < sub_schurs->n_subs; i++) {
763 IS is_subset_B;
764 Mat AE_EE, AE_IE, AE_EI, S_Ej;
765
766 /* subsets in original and boundary numbering */
767 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B));
768 /* EE block */
769 PetscCall(MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE));
770 /* IE block */
771 PetscCall(MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE));
772 /* EI block */
773 if (sub_schurs->is_symmetric) {
774 PetscCall(MatCreateTranspose(AE_IE, &AE_EI));
775 } else if (sub_schurs->is_hermitian) {
776 PetscCall(MatCreateHermitianTranspose(AE_IE, &AE_EI));
777 } else {
778 PetscCall(MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI));
779 }
780 PetscCall(ISDestroy(&is_subset_B));
781 PetscCall(MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej));
782 PetscCall(MatDestroy(&AE_EE));
783 PetscCall(MatDestroy(&AE_IE));
784 PetscCall(MatDestroy(&AE_EI));
785 if (AE_II == A_II) { /* we can reuse the same ksp */
786 KSP ksp;
787 PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &ksp));
788 PetscCall(MatSchurComplementSetKSP(S_Ej, ksp));
789 } else { /* build new ksp object which inherits ksp and pc types from the original one */
790 KSP origksp, schurksp;
791 PC origpc, schurpc;
792 KSPType ksp_type;
793 PetscInt n_internal;
794 PetscBool ispcnone;
795
796 PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &origksp));
797 PetscCall(MatSchurComplementGetKSP(S_Ej, &schurksp));
798 PetscCall(KSPGetType(origksp, &ksp_type));
799 PetscCall(KSPSetType(schurksp, ksp_type));
800 PetscCall(KSPGetPC(schurksp, &schurpc));
801 PetscCall(KSPGetPC(origksp, &origpc));
802 PetscCall(PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone));
803 if (!ispcnone) {
804 PCType pc_type;
805 PetscCall(PCGetType(origpc, &pc_type));
806 PetscCall(PCSetType(schurpc, pc_type));
807 } else {
808 PetscCall(PCSetType(schurpc, PCLU));
809 }
810 PetscCall(ISGetSize(is_I, &n_internal));
811 if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
812 MatSolverType solver = NULL;
813 PetscCall(PCFactorGetMatSolverType(origpc, &solver));
814 if (solver) PetscCall(PCFactorSetMatSolverType(schurpc, solver));
815 }
816 PetscCall(KSPSetUp(schurksp));
817 }
818 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
819 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl));
820 PetscCall(PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl));
821 PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense));
822 PetscCheck(Sdense, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices");
823 for (j = 0; j < subset_size; j++) dummy_idx[j] = local_size + j;
824 PetscCall(MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES));
825 PetscCall(MatDestroy(&S_Ej));
826 PetscCall(MatDestroy(&S_Ej_expl));
827 local_size += subset_size;
828 }
829 PetscCall(PetscFree2(dummy_idx, work));
830 /* free */
831 PetscCall(ISDestroy(&is_I));
832 PetscCall(MatDestroy(&AE_II));
833 PetscCall(PetscFree(all_local_idx_N));
834 } else {
835 Mat A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL;
836 Mat *gdswA;
837 Vec Dall = NULL;
838 IS is_A_all, *is_p_r = NULL, is_schur;
839 MatType Stype;
840 PetscScalar *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL;
841 PetscScalar *SEj_arr = NULL, *SEjinv_arr = NULL;
842 const PetscScalar *rS_data;
843 PetscInt n, n_I, size_schur, size_active_schur, cum, cum2;
844 PetscBool economic, solver_S, S_lower_triangular = PETSC_FALSE;
845 PetscBool schur_has_vertices, factor_workaround;
846 PetscBool use_cholesky;
847 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
848 PetscBool oldpin;
849 #endif
850 /* multi-element */
851 IS *is_sub_all = NULL, *is_sub_schur_all = NULL, *is_sub_schur = NULL;
852
853 /* get sizes */
854 n_I = 0;
855 if (is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &n_I));
856 economic = PETSC_FALSE;
857 PetscCall(ISGetLocalSize(sub_schurs->is_I, &cum));
858 if (cum != n_I) economic = PETSC_TRUE;
859 PetscCall(MatGetLocalSize(sub_schurs->A, &n, NULL));
860 size_active_schur = local_size;
861
862 /* import scaling vector (wrong formulation if we have 3D edges) */
863 if (scaling && compute_Stilda) {
864 const PetscScalar *array;
865 PetscScalar *array2;
866 const PetscInt *idxs;
867 PetscInt i;
868
869 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
870 PetscCall(VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall));
871 PetscCall(VecGetArrayRead(scaling, &array));
872 PetscCall(VecGetArray(Dall, &array2));
873 for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]];
874 PetscCall(VecRestoreArray(Dall, &array2));
875 PetscCall(VecRestoreArrayRead(scaling, &array));
876 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
877 deluxe = PETSC_FALSE;
878 }
879
880 /* size active schurs does not count any dirichlet or vertex dof on the interface */
881 factor_workaround = PETSC_FALSE;
882 schur_has_vertices = PETSC_FALSE;
883 cum = n_I + size_active_schur;
884 if (sub_schurs->is_dir) {
885 const PetscInt *idxs;
886 PetscInt n_dir;
887
888 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
889 PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
890 PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_dir));
891 if (multi_element)
892 for (PetscInt j = 0; j < n_dir; j++) all_local_subid_N[j + cum] = local_subs[idxs[j]];
893 PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
894 cum += n_dir;
895 if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
896 }
897 /* include the primal vertices in the Schur complement */
898 if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
899 PetscInt n_v;
900
901 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
902 if (n_v) {
903 const PetscInt *idxs;
904
905 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
906 PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_v));
907 if (multi_element)
908 for (PetscInt j = 0; j < n_v; j++) all_local_subid_N[j + cum] = local_subs[idxs[j]];
909 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
910 cum += n_v;
911 if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
912 schur_has_vertices = PETSC_TRUE;
913 }
914 }
915 size_schur = cum - n_I;
916 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all));
917 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
918 oldpin = sub_schurs->A->boundtocpu;
919 PetscCall(MatBindToCPU(sub_schurs->A, PETSC_TRUE));
920 #endif
921 if (cum == n) {
922 PetscCall(ISSetPermutation(is_A_all));
923 PetscCall(MatPermute(sub_schurs->A, is_A_all, is_A_all, &A));
924 } else {
925 PetscCall(MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A));
926 }
927 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
928 PetscCall(MatBindToCPU(sub_schurs->A, oldpin));
929 #endif
930 PetscCall(MatSetOptionsPrefixFactor(A, sub_schurs->prefix));
931 PetscCall(MatAppendOptionsPrefixFactor(A, "sub_schurs_"));
932 /* subsets ordered last */
933 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur));
934
935 if (multi_element) {
936 PetscInt *idx_sub;
937
938 PetscCall(PetscMalloc3(n_local_subs, &is_sub_all, n_local_subs, &is_sub_schur_all, n_local_subs, &is_sub_schur));
939 PetscCall(PetscMalloc1(n + size_schur, &idx_sub));
940 for (PetscInt sub = 0; sub < n_local_subs; sub++) {
941 PetscInt size_sub = 0, size_schur_sub = 0, size_I_sub;
942
943 for (PetscInt j = 0; j < n_I; j++)
944 if (all_local_subid_N[j] == sub) idx_sub[size_sub++] = j;
945 size_I_sub = size_sub;
946 for (PetscInt j = n_I; j < n_I + size_schur; j++)
947 if (all_local_subid_N[j] == sub) {
948 idx_sub[size_sub++] = j;
949 idx_sub[n + size_schur_sub++] = j - n_I;
950 }
951
952 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size_sub, idx_sub, PETSC_COPY_VALUES, &is_sub_all[sub]));
953 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size_schur_sub, idx_sub + n, PETSC_COPY_VALUES, &is_sub_schur[sub]));
954 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur_sub, size_I_sub, 1, &is_sub_schur_all[sub]));
955 }
956 PetscCall(PetscFree(idx_sub));
957 }
958
959 /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
960 this is a workaround */
961 if (benign_n) {
962 Vec v, benign_AIIm1_ones;
963 ISLocalToGlobalMapping N_to_reor;
964 IS is_p0, is_p0_p;
965 PetscScalar *cs_AIB, *AIIm1_data;
966 PetscInt sizeA;
967
968 PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor));
969 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
970 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p));
971 PetscCall(ISDestroy(&is_p0));
972 PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
973 PetscCall(VecGetSize(v, &sizeA));
974 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat));
975 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat));
976 PetscCall(MatDenseGetArray(cs_AIB_mat, &cs_AIB));
977 PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
978 PetscCall(PetscMalloc1(benign_n, &is_p_r));
979 /* compute colsum of A_IB restricted to pressures */
980 for (i = 0; i < benign_n; i++) {
981 const PetscScalar *array;
982 const PetscInt *idxs;
983 PetscInt j, nz;
984
985 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i]));
986 PetscCall(ISGetLocalSize(is_p_r[i], &nz));
987 PetscCall(ISGetIndices(is_p_r[i], &idxs));
988 for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.;
989 PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
990 PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
991 PetscCall(MatMult(A, benign_AIIm1_ones, v));
992 PetscCall(VecResetArray(benign_AIIm1_ones));
993 PetscCall(VecGetArrayRead(v, &array));
994 for (j = 0; j < size_schur; j++) {
995 #if defined(PETSC_USE_COMPLEX)
996 cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz));
997 #else
998 cs_AIB[i * size_schur + j] = array[j + n_I] / nz;
999 #endif
1000 }
1001 PetscCall(VecRestoreArrayRead(v, &array));
1002 }
1003 PetscCall(MatDenseRestoreArray(cs_AIB_mat, &cs_AIB));
1004 PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
1005 PetscCall(VecDestroy(&v));
1006 PetscCall(VecDestroy(&benign_AIIm1_ones));
1007 PetscCall(MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
1008 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1009 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1010 PetscCall(MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL));
1011 PetscCall(ISDestroy(&is_p0_p));
1012 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor));
1013 }
1014 PetscCall(MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric));
1015 PetscCall(MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian));
1016 PetscCall(MatSetOption(A, MAT_SPD, sub_schurs->is_posdef));
1017
1018 /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
1019 use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
1020
1021 /* when using the benign subspace trick, the local Schur complements are SPD */
1022 /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
1023 Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
1024 if (benign_trick) {
1025 sub_schurs->is_posdef = PETSC_TRUE;
1026 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg));
1027 if (flg) use_cholesky = PETSC_FALSE;
1028 }
1029 if (sub_schurs->mat_factor_type == MAT_FACTOR_NONE) sub_schurs->mat_factor_type = use_cholesky ? MAT_FACTOR_CHOLESKY : MAT_FACTOR_LU;
1030
1031 if (n_I && !multi_element) {
1032 char stype[64];
1033 PetscBool gpu = PETSC_FALSE;
1034
1035 PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, sub_schurs->mat_factor_type, &F));
1036 PetscCheck(F, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)A)->type_name);
1037 PetscCall(MatSetErrorIfFailure(A, PETSC_TRUE));
1038 #if defined(PETSC_HAVE_MKL_PARDISO)
1039 if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10));
1040 #endif
1041 PetscCall(MatFactorSetSchurIS(F, is_schur));
1042
1043 /* factorization step */
1044 switch (sub_schurs->mat_factor_type) {
1045 case MAT_FACTOR_CHOLESKY:
1046 PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL));
1047 /* be sure that icntl 19 is not set by command line */
1048 PetscCall(MatMumpsSetIcntl(F, 19, 2));
1049 PetscCall(MatCholeskyFactorNumeric(F, A, NULL));
1050 S_lower_triangular = PETSC_TRUE;
1051 break;
1052 case MAT_FACTOR_LU:
1053 PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL));
1054 /* be sure that icntl 19 is not set by command line */
1055 PetscCall(MatMumpsSetIcntl(F, 19, 3));
1056 PetscCall(MatLUFactorNumeric(F, A, NULL));
1057 break;
1058 default:
1059 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
1060 }
1061 PetscCall(MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view"));
1062
1063 if (matl_dbg_viewer) {
1064 Mat S;
1065 IS is;
1066
1067 PetscCall(PetscObjectSetName((PetscObject)A, "A"));
1068 PetscCall(MatView(A, matl_dbg_viewer));
1069 PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
1070 PetscCall(PetscObjectSetName((PetscObject)S, "S"));
1071 PetscCall(MatView(S, matl_dbg_viewer));
1072 PetscCall(MatDestroy(&S));
1073 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is));
1074 PetscCall(PetscObjectSetName((PetscObject)is, "I"));
1075 PetscCall(ISView(is, matl_dbg_viewer));
1076 PetscCall(ISDestroy(&is));
1077 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is));
1078 PetscCall(PetscObjectSetName((PetscObject)is, "B"));
1079 PetscCall(ISView(is, matl_dbg_viewer));
1080 PetscCall(ISDestroy(&is));
1081 PetscCall(PetscObjectSetName((PetscObject)is_A_all, "IA"));
1082 PetscCall(ISView(is_A_all, matl_dbg_viewer));
1083 for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) {
1084 IS is;
1085 char name[16];
1086
1087 PetscCall(PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i));
1088 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1089 PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is));
1090 PetscCall(PetscObjectSetName((PetscObject)is, name));
1091 PetscCall(ISView(is, matl_dbg_viewer));
1092 PetscCall(ISDestroy(&is));
1093 if (sub_schurs->change) {
1094 Mat T;
1095
1096 PetscCall(PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i));
1097 PetscCall(KSPGetOperators(sub_schurs->change[i], &T, NULL));
1098 PetscCall(PetscObjectSetName((PetscObject)T, name));
1099 PetscCall(MatView(T, matl_dbg_viewer));
1100 PetscCall(PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i));
1101 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name));
1102 PetscCall(ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer));
1103 }
1104 cum += subset_size;
1105 }
1106 PetscCall(PetscViewerFlush(matl_dbg_viewer));
1107 }
1108
1109 /* get explicit Schur Complement computed during numeric factorization */
1110 PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1111 PetscCall(PetscStrncpy(stype, MATSEQDENSE, sizeof(stype)));
1112 #if defined(PETSC_HAVE_CUDA)
1113 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, ""));
1114 #endif
1115 if (gpu) PetscCall(PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype)));
1116 PetscCall(PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL));
1117 PetscCall(MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all));
1118 PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef));
1119 PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian));
1120 PetscCall(MatGetType(S_all, &Stype));
1121
1122 /* we can reuse the solvers if we are not using the economic version */
1123 reuse_solvers = (PetscBool)(reuse_solvers && !economic && !sub_schurs->graph->multi_element);
1124 if (!sub_schurs->gdsw) {
1125 factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1126 if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE;
1127 }
1128 solver_S = PETSC_TRUE;
1129
1130 /* update the Schur complement with the change of basis on the pressures */
1131 if (benign_n) {
1132 const PetscScalar *cs_AIB;
1133 PetscScalar *S_data, *AIIm1_data;
1134 Mat S2 = NULL, S3 = NULL; /* dbg */
1135 PetscScalar *S2_data, *S3_data; /* dbg */
1136 Vec v, benign_AIIm1_ones;
1137 PetscInt sizeA;
1138
1139 PetscCall(MatDenseGetArray(S_all, &S_data));
1140 PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
1141 PetscCall(VecGetSize(v, &sizeA));
1142 PetscCall(MatMumpsSetIcntl(F, 26, 0));
1143 #if defined(PETSC_HAVE_MKL_PARDISO)
1144 PetscCall(MatMkl_PardisoSetCntl(F, 70, 1));
1145 #endif
1146 PetscCall(MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB));
1147 PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
1148 if (matl_dbg_viewer) {
1149 PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2));
1150 PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3));
1151 PetscCall(MatDenseGetArray(S2, &S2_data));
1152 PetscCall(MatDenseGetArray(S3, &S3_data));
1153 }
1154 for (i = 0; i < benign_n; i++) {
1155 PetscScalar *array, sum = 0., one = 1., *sums;
1156 const PetscInt *idxs;
1157 PetscInt k, j, nz;
1158 PetscBLASInt B_k, B_n;
1159
1160 PetscCall(PetscCalloc1(benign_n, &sums));
1161 PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
1162 PetscCall(VecCopy(benign_AIIm1_ones, v));
1163 PetscCall(MatSolve(F, v, benign_AIIm1_ones));
1164 PetscCall(MatMult(A, benign_AIIm1_ones, v));
1165 PetscCall(VecResetArray(benign_AIIm1_ones));
1166 /* p0 dofs (eliminated) are excluded from the sums */
1167 for (k = 0; k < benign_n; k++) {
1168 PetscCall(ISGetLocalSize(is_p_r[k], &nz));
1169 PetscCall(ISGetIndices(is_p_r[k], &idxs));
1170 for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i];
1171 PetscCall(ISRestoreIndices(is_p_r[k], &idxs));
1172 }
1173 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
1174 if (matl_dbg_viewer) {
1175 Vec vv;
1176 char name[16];
1177
1178 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv));
1179 PetscCall(PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i));
1180 PetscCall(PetscObjectSetName((PetscObject)vv, name));
1181 PetscCall(VecView(vv, matl_dbg_viewer));
1182 }
1183 /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1184 /* cs_AIB already scaled by 1./nz */
1185 B_k = 1;
1186 PetscCall(PetscBLASIntCast(size_schur, &B_n));
1187 for (k = 0; k < benign_n; k++) {
1188 sum = sums[k];
1189
1190 if (PetscAbsScalar(sum) == 0.0) continue;
1191 if (k == i) {
1192 if (B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1193 if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
1194 } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1195 sum /= 2.0;
1196 if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1197 if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
1198 }
1199 }
1200 sum = 1.;
1201 if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1202 if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n));
1203 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
1204 /* set p0 entry of AIIm1_ones to zero */
1205 PetscCall(ISGetLocalSize(is_p_r[i], &nz));
1206 PetscCall(ISGetIndices(is_p_r[i], &idxs));
1207 for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.;
1208 PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
1209 PetscCall(PetscFree(sums));
1210 }
1211 PetscCall(VecDestroy(&benign_AIIm1_ones));
1212 if (matl_dbg_viewer) {
1213 PetscCall(MatDenseRestoreArray(S2, &S2_data));
1214 PetscCall(MatDenseRestoreArray(S3, &S3_data));
1215 }
1216 if (!S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */
1217 PetscInt k, j;
1218 for (k = 0; k < size_schur; k++) {
1219 for (j = k; j < size_schur; j++) S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]);
1220 }
1221 }
1222
1223 /* restore defaults */
1224 PetscCall(MatMumpsSetIcntl(F, 26, -1));
1225 #if defined(PETSC_HAVE_MKL_PARDISO)
1226 PetscCall(MatMkl_PardisoSetCntl(F, 70, 0));
1227 #endif
1228 PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB));
1229 PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
1230 PetscCall(VecDestroy(&v));
1231 PetscCall(MatDenseRestoreArray(S_all, &S_data));
1232 if (matl_dbg_viewer) {
1233 Mat S;
1234
1235 PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1236 PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
1237 PetscCall(PetscObjectSetName((PetscObject)S, "Sb"));
1238 PetscCall(MatView(S, matl_dbg_viewer));
1239 PetscCall(MatDestroy(&S));
1240 PetscCall(PetscObjectSetName((PetscObject)S2, "S2P"));
1241 PetscCall(MatView(S2, matl_dbg_viewer));
1242 PetscCall(PetscObjectSetName((PetscObject)S3, "S3P"));
1243 PetscCall(MatView(S3, matl_dbg_viewer));
1244 PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat, "cs"));
1245 PetscCall(MatView(cs_AIB_mat, matl_dbg_viewer));
1246 PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1247 }
1248 PetscCall(MatDestroy(&S2));
1249 PetscCall(MatDestroy(&S3));
1250 }
1251 if (!reuse_solvers) {
1252 for (i = 0; i < benign_n; i++) PetscCall(ISDestroy(&is_p_r[i]));
1253 PetscCall(PetscFree(is_p_r));
1254 PetscCall(MatDestroy(&cs_AIB_mat));
1255 PetscCall(MatDestroy(&benign_AIIm1_ones_mat));
1256 }
1257 } else if (multi_element) { /* MUMPS does not support sparse Schur complements. Loop over local subs */
1258 PetscInt *nnz;
1259 const PetscInt *idxs;
1260 PetscInt size_schur_sub;
1261
1262 PetscCall(PetscCalloc1(size_schur, &nnz));
1263 for (PetscInt sub = 0; sub < n_local_subs; sub++) {
1264 PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub));
1265 PetscCall(ISGetIndices(is_sub_schur[sub], &idxs));
1266 for (PetscInt j = 0; j < size_schur_sub; j++) nnz[idxs[j]] = size_schur_sub;
1267 PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs));
1268 }
1269 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, size_schur, size_schur, 0, nnz, &S_all));
1270 PetscCall(MatSetOption(S_all, MAT_ROW_ORIENTED, sub_schurs->is_hermitian));
1271 PetscCall(PetscFree(nnz));
1272
1273 for (PetscInt sub = 0; sub < n_local_subs; sub++) {
1274 Mat Asub, Ssub;
1275 const PetscScalar *vals;
1276 PetscInt size_all_sub;
1277
1278 F = NULL;
1279 PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub));
1280 PetscCall(ISGetLocalSize(is_sub_all[sub], &size_all_sub));
1281 PetscCall(MatCreateSubMatrix(A, is_sub_all[sub], is_sub_all[sub], MAT_INITIAL_MATRIX, &Asub));
1282 if (size_schur_sub == size_all_sub) {
1283 /* we can't use MatFactor when size_schur == size_of_the_problem */
1284 PetscCall(MatConvert(Asub, MATDENSE, MAT_INITIAL_MATRIX, &Ssub));
1285 } else {
1286 PetscCall(MatGetFactor(Asub, sub_schurs->mat_solver_type, sub_schurs->mat_factor_type, &F));
1287 PetscCheck(F, PetscObjectComm((PetscObject)Asub), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)Asub)->type_name);
1288 PetscCall(MatSetErrorIfFailure(Asub, PETSC_TRUE));
1289 #if defined(PETSC_HAVE_MKL_PARDISO)
1290 if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10));
1291 #endif
1292 /* subsets ordered last */
1293 PetscCall(MatFactorSetSchurIS(F, is_sub_schur_all[sub]));
1294
1295 /* factorization step */
1296 switch (sub_schurs->mat_factor_type) {
1297 case MAT_FACTOR_CHOLESKY:
1298 PetscCall(MatCholeskyFactorSymbolic(F, Asub, NULL, NULL));
1299 /* be sure that icntl 19 is not set by command line */
1300 PetscCall(MatMumpsSetIcntl(F, 19, 2));
1301 PetscCall(MatCholeskyFactorNumeric(F, Asub, NULL));
1302 S_lower_triangular = PETSC_TRUE;
1303 break;
1304 case MAT_FACTOR_LU:
1305 PetscCall(MatLUFactorSymbolic(F, Asub, NULL, NULL, NULL));
1306 /* be sure that icntl 19 is not set by command line */
1307 PetscCall(MatMumpsSetIcntl(F, 19, 3));
1308 PetscCall(MatLUFactorNumeric(F, Asub, NULL));
1309 break;
1310 default:
1311 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
1312 }
1313 PetscCall(MatFactorCreateSchurComplement(F, &Ssub, NULL));
1314 }
1315 PetscCall(MatDestroy(&Asub));
1316 PetscCall(MatDenseGetArrayRead(Ssub, &vals));
1317 PetscCall(ISGetIndices(is_sub_schur[sub], &idxs));
1318 PetscCall(MatSetValues(S_all, size_schur_sub, idxs, size_schur_sub, idxs, vals, INSERT_VALUES));
1319 PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs));
1320 PetscCall(MatDenseRestoreArrayRead(Ssub, &vals));
1321 PetscCall(MatDestroy(&Ssub));
1322 PetscCall(MatDestroy(&F));
1323 }
1324 PetscCall(MatAssemblyBegin(S_all, MAT_FINAL_ASSEMBLY));
1325 PetscCall(MatAssemblyEnd(S_all, MAT_FINAL_ASSEMBLY));
1326 PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef));
1327 PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian));
1328 Stype = MATDENSE;
1329 reuse_solvers = PETSC_FALSE;
1330 factor_workaround = PETSC_FALSE;
1331 solver_S = PETSC_FALSE;
1332 } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1333 PetscCall(MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all));
1334 PetscCall(MatGetType(S_all, &Stype));
1335 reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1336 factor_workaround = PETSC_FALSE;
1337 solver_S = PETSC_FALSE;
1338 }
1339
1340 if (reuse_solvers) {
1341 Mat A_II, pA_II, Afake;
1342 Vec vec1_B;
1343 PCBDDCReuseSolvers msolv_ctx;
1344 PetscInt n_R;
1345
1346 if (sub_schurs->reuse_solver) {
1347 PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1348 } else {
1349 PetscCall(PetscNew(&sub_schurs->reuse_solver));
1350 }
1351 msolv_ctx = sub_schurs->reuse_solver;
1352 PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL));
1353 PetscCall(PetscObjectReference((PetscObject)F));
1354 msolv_ctx->F = F;
1355 PetscCall(MatCreateVecs(F, &msolv_ctx->sol, NULL));
1356 /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1357 {
1358 PetscScalar *array;
1359 PetscInt n;
1360
1361 PetscCall(VecGetLocalSize(msolv_ctx->sol, &n));
1362 PetscCall(VecGetArray(msolv_ctx->sol, &array));
1363 PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs));
1364 PetscCall(VecRestoreArray(msolv_ctx->sol, &array));
1365 }
1366 msolv_ctx->has_vertices = schur_has_vertices;
1367
1368 /* interior solver */
1369 PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver));
1370 PetscCall(PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II));
1371 PetscCall(PCSetType(msolv_ctx->interior_solver, PCSHELL));
1372 PetscCall(PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)"));
1373 PetscCall(PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx));
1374 PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
1375 PetscCall(PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior));
1376 PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose));
1377 if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy));
1378
1379 /* correction solver */
1380 if (!sub_schurs->gdsw) {
1381 PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver));
1382 PetscCall(PCSetType(msolv_ctx->correction_solver, PCSHELL));
1383 PetscCall(PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)"));
1384 PetscCall(PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx));
1385 PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
1386 PetscCall(PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction));
1387 PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose));
1388
1389 /* scatter and vecs for Schur complement solver */
1390 PetscCall(MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B));
1391 PetscCall(MatCreateVecs(sub_schurs->S, &vec1_B, NULL));
1392 if (!schur_has_vertices) {
1393 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B));
1394 PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B));
1395 PetscCall(PetscObjectReference((PetscObject)is_A_all));
1396 msolv_ctx->is_R = is_A_all;
1397 } else {
1398 IS is_B_all;
1399 const PetscInt *idxs;
1400 PetscInt dual, n_v, n;
1401
1402 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
1403 dual = size_schur - n_v;
1404 PetscCall(ISGetLocalSize(is_A_all, &n));
1405 PetscCall(ISGetIndices(is_A_all, &idxs));
1406 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all));
1407 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B));
1408 PetscCall(ISDestroy(&is_B_all));
1409 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all));
1410 PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B));
1411 PetscCall(ISDestroy(&is_B_all));
1412 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R));
1413 PetscCall(ISRestoreIndices(is_A_all, &idxs));
1414 }
1415 PetscCall(ISGetLocalSize(msolv_ctx->is_R, &n_R));
1416 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake));
1417 PetscCall(MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY));
1418 PetscCall(MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY));
1419 PetscCall(PCSetOperators(msolv_ctx->correction_solver, Afake, Afake));
1420 PetscCall(MatDestroy(&Afake));
1421 PetscCall(VecDestroy(&vec1_B));
1422 }
1423 /* communicate benign info to solver context */
1424 if (benign_n) {
1425 PetscScalar *array;
1426
1427 msolv_ctx->benign_n = benign_n;
1428 msolv_ctx->benign_zerodiag_subs = is_p_r;
1429 PetscCall(PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals));
1430 msolv_ctx->benign_csAIB = cs_AIB_mat;
1431 PetscCall(MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL));
1432 PetscCall(VecGetArray(msolv_ctx->benign_corr_work, &array));
1433 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec));
1434 PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work, &array));
1435 msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1436 }
1437 } else {
1438 if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1439 PetscCall(PetscFree(sub_schurs->reuse_solver));
1440 }
1441 PetscCall(MatDestroy(&A));
1442 PetscCall(ISDestroy(&is_A_all));
1443
1444 /* Work arrays */
1445 PetscCall(PetscMalloc1(max_subset_size * max_subset_size, &work));
1446
1447 /* S_Ej_all */
1448 PetscInt *idx_work = NULL;
1449 cum = cum2 = 0;
1450 if (!multi_element) PetscCall(MatDenseGetArrayRead(S_all, &rS_data));
1451 else PetscCall(PetscMalloc1(max_subset_size, &idx_work));
1452 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr));
1453 if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr));
1454 if (sub_schurs->gdsw) PetscCall(MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA));
1455 for (i = 0; i < sub_schurs->n_subs; i++) {
1456 /* get S_E (or K^i_EE for GDSW) */
1457 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1458 if (sub_schurs->gdsw) {
1459 Mat T;
1460
1461 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T));
1462 PetscCall(MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T));
1463 PetscCall(MatDestroy(&T));
1464 } else {
1465 if (multi_element) { /* transpose copy to workspace */
1466 // XXX CSR directly?
1467 for (PetscInt j = 0; j < subset_size; j++) idx_work[j] = cum + j;
1468 PetscCall(MatGetValues(S_all, subset_size, idx_work, subset_size, idx_work, work));
1469 if (!sub_schurs->is_hermitian) {
1470 for (PetscInt k = 0; k < subset_size; k++) {
1471 for (PetscInt j = k; j < subset_size; j++) {
1472 PetscScalar t = work[k * subset_size + j];
1473 work[k * subset_size + j] = work[j * subset_size + k];
1474 work[j * subset_size + k] = t;
1475 }
1476 }
1477 }
1478 } else if (S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */
1479 for (PetscInt k = 0; k < subset_size; k++) {
1480 for (PetscInt j = k; j < subset_size; j++) {
1481 work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1482 work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]);
1483 }
1484 }
1485 } else { /* just copy to workspace */
1486 for (PetscInt k = 0; k < subset_size; k++) {
1487 for (PetscInt j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1488 }
1489 }
1490 }
1491 /* insert S_E values */
1492 if (sub_schurs->change) {
1493 Mat change_sub, SEj, T;
1494
1495 /* change basis */
1496 PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1497 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
1498 if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1499 Mat T2;
1500 PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
1501 PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1502 PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
1503 PetscCall(MatDestroy(&T2));
1504 } else {
1505 PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1506 }
1507 PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
1508 PetscCall(MatDestroy(&T));
1509 PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL));
1510 PetscCall(MatDestroy(&SEj));
1511 }
1512 PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1513 if (compute_Stilda) {
1514 if (deluxe) { /* if adaptivity is requested, invert S_E blocks */
1515 Mat M;
1516 const PetscScalar *vals;
1517 PetscBool isdense, isdensecuda;
1518
1519 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M));
1520 PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef));
1521 PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian));
1522 if (!PetscBTLookup(sub_schurs->is_edge, i)) PetscCall(MatSetType(M, Stype));
1523 PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense));
1524 PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda));
1525 switch (sub_schurs->mat_factor_type) {
1526 case MAT_FACTOR_CHOLESKY:
1527 PetscCall(MatCholeskyFactor(M, NULL, NULL));
1528 break;
1529 case MAT_FACTOR_LU:
1530 PetscCall(MatLUFactor(M, NULL, NULL, NULL));
1531 break;
1532 default:
1533 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
1534 }
1535 if (isdense) {
1536 PetscCall(MatSeqDenseInvertFactors_Private(M));
1537 #if defined(PETSC_HAVE_CUDA)
1538 } else if (isdensecuda) {
1539 PetscCall(MatSeqDenseCUDAInvertFactors_Internal(M));
1540 #endif
1541 } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype);
1542 PetscCall(MatDenseGetArrayRead(M, &vals));
1543 PetscCall(PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size));
1544 PetscCall(MatDenseRestoreArrayRead(M, &vals));
1545 PetscCall(MatDestroy(&M));
1546 } else if (scaling) { /* not using deluxe */
1547 Mat SEj;
1548 Vec D;
1549 PetscScalar *array;
1550
1551 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
1552 PetscCall(VecGetArray(Dall, &array));
1553 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D));
1554 PetscCall(VecRestoreArray(Dall, &array));
1555 PetscCall(VecShift(D, -1.));
1556 PetscCall(MatDiagonalScale(SEj, D, D));
1557 PetscCall(MatDestroy(&SEj));
1558 PetscCall(VecDestroy(&D));
1559 PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1560 }
1561 }
1562 cum += subset_size;
1563 cum2 += subset_size * (size_schur + 1);
1564 SEj_arr += subset_size * subset_size;
1565 if (SEjinv_arr) SEjinv_arr += subset_size * subset_size;
1566 }
1567 if (sub_schurs->gdsw) PetscCall(MatDestroySubMatrices(sub_schurs->n_subs, &gdswA));
1568 if (!multi_element) PetscCall(MatDenseRestoreArrayRead(S_all, &rS_data));
1569 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr));
1570 if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr));
1571 if (solver_S) PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1572
1573 /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1574 however, preliminary tests indicate using GPUs is still faster in the solve phase */
1575 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1576 if (reuse_solvers) {
1577 Mat St;
1578 MatFactorSchurStatus st;
1579
1580 flg = PETSC_FALSE;
1581 PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL));
1582 PetscCall(MatFactorGetSchurComplement(F, &St, &st));
1583 PetscCall(MatBindToCPU(St, flg));
1584 PetscCall(MatFactorRestoreSchurComplement(F, &St, st));
1585 }
1586 #endif
1587
1588 schur_factor = NULL;
1589 if (compute_Stilda && size_active_schur) {
1590 if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1591 PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1592 PetscCall(PetscArraycpy(SEjinv_arr, work, size_schur * size_schur));
1593 PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1594 } else {
1595 Mat S_all_inv = NULL;
1596
1597 if (solver_S && !sub_schurs->gdsw) {
1598 /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1599 The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1600 if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1601 PetscScalar *data;
1602 PetscInt nd = 0;
1603
1604 PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1605 PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1606 PetscCall(MatDenseGetArray(S_all_inv, &data));
1607 if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1608 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1609 }
1610
1611 /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1612 if (schur_has_vertices) {
1613 Mat M;
1614 PetscScalar *tdata;
1615 PetscInt nv = 0, news;
1616
1617 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
1618 news = size_active_schur + nv;
1619 PetscCall(PetscCalloc1(news * news, &tdata));
1620 for (i = 0; i < size_active_schur; i++) {
1621 PetscCall(PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i));
1622 PetscCall(PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv));
1623 }
1624 for (i = 0; i < nv; i++) {
1625 PetscInt k = i + size_active_schur;
1626 PetscCall(PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i));
1627 }
1628
1629 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M));
1630 PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
1631 PetscCall(MatCholeskyFactor(M, NULL, NULL));
1632 /* save the factors */
1633 cum = 0;
1634 PetscCall(PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor));
1635 for (i = 0; i < size_active_schur; i++) {
1636 PetscCall(PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i));
1637 cum += size_active_schur - i;
1638 }
1639 for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)]));
1640 S_all_inv->ops->solve = M->ops->solve;
1641 S_all_inv->ops->matsolve = M->ops->matsolve;
1642 S_all_inv->ops->solvetranspose = M->ops->solvetranspose;
1643 S_all_inv->ops->matsolvetranspose = M->ops->matsolvetranspose;
1644 S_all_inv->factortype = MAT_FACTOR_CHOLESKY;
1645 PetscCall(MatSeqDenseInvertFactors_Private(M));
1646 /* move back just the active dofs to the Schur complement */
1647 for (i = 0; i < size_active_schur; i++) PetscCall(PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur));
1648 PetscCall(PetscFree(tdata));
1649 PetscCall(MatDestroy(&M));
1650 } else { /* we can factorize and invert just the activedofs */
1651 Mat M;
1652 PetscScalar *aux;
1653
1654 PetscCall(PetscMalloc1(nd, &aux));
1655 for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)];
1656 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M));
1657 PetscCall(MatDenseSetLDA(M, size_schur));
1658 PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
1659 PetscCall(MatCholeskyFactor(M, NULL, NULL));
1660 PetscCall(MatSeqDenseInvertFactors_Private(M));
1661 PetscCall(MatDestroy(&M));
1662 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M));
1663 PetscCall(MatZeroEntries(M));
1664 PetscCall(MatDestroy(&M));
1665 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M));
1666 PetscCall(MatDenseSetLDA(M, size_schur));
1667 PetscCall(MatZeroEntries(M));
1668 PetscCall(MatDestroy(&M));
1669 for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i];
1670 PetscCall(PetscFree(aux));
1671 }
1672 PetscCall(MatDenseRestoreArray(S_all_inv, &data));
1673 } else { /* use MatFactor calls to invert S */
1674 PetscCall(MatFactorInvertSchurComplement(F));
1675 PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1676 }
1677 } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */
1678 if (multi_element) {
1679 PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S_all_inv));
1680 PetscCall(MatSetOption(S_all_inv, MAT_ROW_ORIENTED, sub_schurs->is_hermitian));
1681 for (PetscInt sub = 0; sub < n_local_subs; sub++) {
1682 const PetscScalar *vals;
1683 const PetscInt *idxs;
1684 PetscInt size_schur_sub;
1685 Mat M;
1686
1687 PetscCall(MatCreateSubMatrix(S_all, is_sub_schur[sub], is_sub_schur[sub], MAT_INITIAL_MATRIX, &M));
1688 PetscCall(MatConvert(M, MATDENSE, MAT_INPLACE_MATRIX, &M));
1689 PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef));
1690 PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian));
1691 switch (sub_schurs->mat_factor_type) {
1692 case MAT_FACTOR_CHOLESKY:
1693 PetscCall(MatCholeskyFactor(M, NULL, NULL));
1694 break;
1695 case MAT_FACTOR_LU:
1696 PetscCall(MatLUFactor(M, NULL, NULL, NULL));
1697 break;
1698 default:
1699 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
1700 }
1701 PetscCall(MatSeqDenseInvertFactors_Private(M));
1702 PetscCall(MatDenseGetArrayRead(M, &vals));
1703 PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub));
1704 PetscCall(ISGetIndices(is_sub_schur[sub], &idxs));
1705 PetscCall(MatSetValues(S_all_inv, size_schur_sub, idxs, size_schur_sub, idxs, vals, INSERT_VALUES));
1706 PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs));
1707 PetscCall(MatDenseRestoreArrayRead(M, &vals));
1708 PetscCall(MatDestroy(&M));
1709 }
1710 PetscCall(MatAssemblyBegin(S_all_inv, MAT_FINAL_ASSEMBLY));
1711 PetscCall(MatAssemblyEnd(S_all_inv, MAT_FINAL_ASSEMBLY));
1712 } else {
1713 PetscCall(PetscObjectReference((PetscObject)S_all));
1714 S_all_inv = S_all;
1715 PetscCall(MatDenseGetArray(S_all_inv, &S_data));
1716 PetscCall(PetscBLASIntCast(size_schur, &B_N));
1717 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1718 if (use_potr) {
1719 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr));
1720 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
1721 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr));
1722 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
1723 } else if (use_sytr) {
1724 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1725 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
1726 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr));
1727 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
1728 } else {
1729 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr));
1730 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
1731 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1732 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
1733 }
1734 PetscCall(PetscLogFlops(1.0 * size_schur * size_schur * size_schur));
1735 PetscCall(PetscFPTrapPop());
1736 PetscCall(MatDenseRestoreArray(S_all_inv, &S_data));
1737 }
1738 } else if (sub_schurs->gdsw) {
1739 Mat tS, tX, SEj, S_II, S_IE, S_EE;
1740 KSP pS_II;
1741 PC pS_II_pc;
1742 IS EE, II;
1743 PetscInt nS;
1744
1745 PetscCall(MatFactorCreateSchurComplement(F, &tS, NULL));
1746 PetscCall(MatGetSize(tS, &nS, NULL));
1747 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1748 for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */
1749 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1750 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj));
1751
1752 PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE));
1753 PetscCall(ISComplement(EE, 0, nS, &II));
1754 PetscCall(MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II));
1755 PetscCall(MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE));
1756 PetscCall(MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE));
1757 PetscCall(ISDestroy(&II));
1758 PetscCall(ISDestroy(&EE));
1759
1760 PetscCall(KSPCreate(PETSC_COMM_SELF, &pS_II));
1761 PetscCall(KSPSetNestLevel(pS_II, 1)); /* do not have direct access to a PC to provide the level of nesting of the KSP */
1762 PetscCall(KSPSetType(pS_II, KSPPREONLY));
1763 PetscCall(KSPGetPC(pS_II, &pS_II_pc));
1764 PetscCall(PCSetType(pS_II_pc, PCSVD));
1765 PetscCall(KSPSetOptionsPrefix(pS_II, sub_schurs->prefix));
1766 PetscCall(KSPAppendOptionsPrefix(pS_II, "pseudo_"));
1767 PetscCall(KSPSetOperators(pS_II, S_II, S_II));
1768 PetscCall(MatDestroy(&S_II));
1769 PetscCall(KSPSetFromOptions(pS_II));
1770 PetscCall(KSPSetUp(pS_II));
1771 PetscCall(MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX));
1772 PetscCall(KSPMatSolve(pS_II, S_IE, tX));
1773 PetscCall(KSPDestroy(&pS_II));
1774
1775 PetscCall(MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DETERMINE, &SEj));
1776 PetscCall(MatDestroy(&S_IE));
1777 PetscCall(MatDestroy(&tX));
1778 PetscCall(MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN));
1779 PetscCall(MatDestroy(&S_EE));
1780
1781 PetscCall(MatDestroy(&SEj));
1782 cum += subset_size;
1783 SEjinv_arr += subset_size * subset_size;
1784 }
1785 PetscCall(MatDestroy(&tS));
1786 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1787 }
1788 /* S_Ej_tilda_all */
1789 cum = cum2 = 0;
1790 rS_data = NULL;
1791 if (S_all_inv && !multi_element) PetscCall(MatDenseGetArrayRead(S_all_inv, &rS_data));
1792 PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1793 for (i = 0; i < sub_schurs->n_subs; i++) {
1794 PetscInt j;
1795
1796 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1797 /* get (St^-1)_E */
1798 /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1799 will be properly accessed later during adaptive selection */
1800 if (multi_element) { /* transpose copy to workspace */
1801 // XXX CSR directly?
1802 for (PetscInt j = 0; j < subset_size; j++) idx_work[j] = cum + j;
1803 PetscCall(MatGetValues(S_all_inv, subset_size, idx_work, subset_size, idx_work, work));
1804 if (!sub_schurs->is_hermitian) {
1805 for (PetscInt k = 0; k < subset_size; k++) {
1806 for (PetscInt j = k; j < subset_size; j++) {
1807 PetscScalar t = work[k * subset_size + j];
1808 work[k * subset_size + j] = work[j * subset_size + k];
1809 work[j * subset_size + k] = t;
1810 }
1811 }
1812 }
1813 } else if (rS_data) {
1814 if (S_lower_triangular) {
1815 PetscInt k;
1816 if (sub_schurs->change) {
1817 for (k = 0; k < subset_size; k++) {
1818 for (j = k; j < subset_size; j++) {
1819 work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1820 work[j * subset_size + k] = work[k * subset_size + j];
1821 }
1822 }
1823 } else {
1824 for (k = 0; k < subset_size; k++) {
1825 for (j = k; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1826 }
1827 }
1828 } else {
1829 PetscInt k;
1830 for (k = 0; k < subset_size; k++) {
1831 for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1832 }
1833 }
1834 }
1835 if (sub_schurs->change) {
1836 Mat change_sub, SEj, T;
1837 PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL;
1838
1839 /* change basis */
1840 PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1841 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, (rS_data || multi_element) ? work : SEjinv_arr, &SEj));
1842 if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1843 Mat T2;
1844 PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
1845 PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1846 PetscCall(MatDestroy(&T2));
1847 PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
1848 } else {
1849 PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1850 }
1851 PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
1852 PetscCall(MatDestroy(&T));
1853 PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL));
1854 PetscCall(MatDestroy(&SEj));
1855 }
1856 if (rS_data || multi_element) PetscCall(PetscArraycpy(SEjinv_arr, work, subset_size * subset_size));
1857 cum += subset_size;
1858 cum2 += subset_size * (size_schur + 1);
1859 SEjinv_arr += subset_size * subset_size;
1860 }
1861 PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1862 if (S_all_inv) {
1863 if (!multi_element) PetscCall(MatDenseRestoreArrayRead(S_all_inv, &rS_data));
1864 if (solver_S) {
1865 if (schur_has_vertices) {
1866 PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED));
1867 } else {
1868 PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED));
1869 }
1870 }
1871 }
1872 PetscCall(MatDestroy(&S_all_inv));
1873 }
1874
1875 /* move back factors if needed */
1876 if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) {
1877 Mat S_tmp;
1878 PetscInt nd = 0;
1879 PetscScalar *data;
1880
1881 PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1882 PetscCheck(solver_S, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
1883 PetscCall(MatFactorGetSchurComplement(F, &S_tmp, NULL));
1884 PetscCall(MatDenseGetArray(S_tmp, &data));
1885 PetscCall(PetscArrayzero(data, size_schur * size_schur));
1886
1887 if (S_lower_triangular) {
1888 cum = 0;
1889 for (i = 0; i < size_active_schur; i++) {
1890 PetscCall(PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i));
1891 cum += size_active_schur - i;
1892 }
1893 } else {
1894 PetscCall(PetscArraycpy(data, schur_factor, size_schur * size_schur));
1895 }
1896 if (sub_schurs->is_dir) {
1897 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1898 for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i];
1899 }
1900 /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1901 set the diagonal entry of the Schur factor to a very large value */
1902 for (i = size_active_schur + nd; i < size_schur; i++) data[i * (size_schur + 1)] = infty;
1903 PetscCall(MatDenseRestoreArray(S_tmp, &data));
1904 PetscCall(MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED));
1905 }
1906 } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */
1907 PetscScalar *data;
1908 PetscInt nd = 0;
1909
1910 if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1911 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1912 }
1913 PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1914 PetscCall(MatDenseGetArray(S_all, &data));
1915 for (i = 0; i < size_active_schur; i++) PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
1916 for (i = size_active_schur + nd; i < size_schur; i++) {
1917 PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
1918 data[i * (size_schur + 1)] = infty;
1919 }
1920 PetscCall(MatDenseRestoreArray(S_all, &data));
1921 PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1922 }
1923 PetscCall(PetscFree(idx_work));
1924 PetscCall(PetscFree(work));
1925 PetscCall(PetscFree(schur_factor));
1926 PetscCall(VecDestroy(&Dall));
1927 PetscCall(ISDestroy(&is_schur));
1928 if (multi_element) {
1929 for (PetscInt sub = 0; sub < n_local_subs; sub++) {
1930 PetscCall(ISDestroy(&is_sub_all[sub]));
1931 PetscCall(ISDestroy(&is_sub_schur_all[sub]));
1932 PetscCall(ISDestroy(&is_sub_schur[sub]));
1933 }
1934 PetscCall(PetscFree3(is_sub_all, is_sub_schur_all, is_sub_schur));
1935 }
1936 }
1937 PetscCall(ISDestroy(&is_I_layer));
1938 PetscCall(MatDestroy(&S_all));
1939 PetscCall(MatDestroy(&A_BB));
1940 PetscCall(MatDestroy(&A_IB));
1941 PetscCall(MatDestroy(&A_BI));
1942 PetscCall(MatDestroy(&F));
1943
1944 PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
1945 PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
1946 if (compute_Stilda) {
1947 PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
1948 PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
1949 if (deluxe) {
1950 PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
1951 PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
1952 }
1953 }
1954
1955 /* Get local part of (\sum_j S_Ej) */
1956 if (!sub_schurs->sum_S_Ej_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all));
1957 PetscCall(VecSet(gstash, 0.0));
1958 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray));
1959 PetscCall(VecPlaceArray(lstash, stasharray));
1960 PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1961 PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1962 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray));
1963 PetscCall(VecResetArray(lstash));
1964 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray));
1965 PetscCall(VecPlaceArray(lstash, stasharray));
1966 PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1967 PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1968 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray));
1969 PetscCall(VecResetArray(lstash));
1970
1971 /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1972 if (compute_Stilda) {
1973 PetscCall(VecSet(gstash, 0.0));
1974 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
1975 PetscCall(VecPlaceArray(lstash, stasharray));
1976 PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1977 PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1978 PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1979 PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1980 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
1981 PetscCall(VecResetArray(lstash));
1982 if (deluxe) {
1983 PetscCall(VecSet(gstash, 0.0));
1984 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
1985 PetscCall(VecPlaceArray(lstash, stasharray));
1986 PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1987 PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1988 PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1989 PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1990 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
1991 PetscCall(VecResetArray(lstash));
1992 } else if (!sub_schurs->gdsw) {
1993 PetscScalar *array;
1994 PetscInt cum;
1995
1996 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array));
1997 cum = 0;
1998 for (i = 0; i < sub_schurs->n_subs; i++) {
1999 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
2000 PetscCall(PetscBLASIntCast(subset_size, &B_N));
2001 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
2002 if (use_potr) {
2003 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr));
2004 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
2005 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr));
2006 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
2007 } else if (use_sytr) {
2008 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
2009 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
2010 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr));
2011 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
2012 } else {
2013 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr));
2014 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
2015 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
2016 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
2017 }
2018 PetscCall(PetscLogFlops(1.0 * subset_size * subset_size * subset_size));
2019 PetscCall(PetscFPTrapPop());
2020 cum += subset_size * subset_size;
2021 }
2022 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array));
2023 PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all));
2024 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
2025 sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
2026 }
2027 }
2028 PetscCall(VecDestroy(&lstash));
2029 PetscCall(VecDestroy(&gstash));
2030 PetscCall(VecScatterDestroy(&sstash));
2031
2032 if (matl_dbg_viewer) {
2033 if (sub_schurs->S_Ej_all) {
2034 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE"));
2035 PetscCall(MatView(sub_schurs->S_Ej_all, matl_dbg_viewer));
2036 }
2037 if (sub_schurs->sum_S_Ej_all) {
2038 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE"));
2039 PetscCall(MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer));
2040 }
2041 if (sub_schurs->sum_S_Ej_inv_all) {
2042 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm"));
2043 PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer));
2044 }
2045 if (sub_schurs->sum_S_Ej_tilda_all) {
2046 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt"));
2047 PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer));
2048 }
2049 }
2050
2051 /* when not explicit, we need to set the factor type */
2052 if (sub_schurs->mat_factor_type == MAT_FACTOR_NONE) sub_schurs->mat_factor_type = sub_schurs->is_hermitian ? MAT_FACTOR_CHOLESKY : MAT_FACTOR_LU;
2053
2054 /* free workspace */
2055 if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer));
2056 if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n));
2057 PetscCall(PetscViewerDestroy(&matl_dbg_viewer));
2058 PetscCall(PetscFree2(Bwork, pivots));
2059 PetscCall(PetscCommDestroy(&comm_n));
2060 PetscCall(PetscFree(all_local_subid_N));
2061 PetscFunctionReturn(PETSC_SUCCESS);
2062 }
2063
PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs,const char * prefix,IS is_I,IS is_B,PCBDDCGraph graph,ISLocalToGlobalMapping BtoNmap,PetscBool copycc,PetscBool gdsw)2064 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw)
2065 {
2066 IS *faces, *edges, *all_cc, vertices;
2067 PetscInt s, i, n_faces, n_edges, n_all_cc;
2068 PetscBool is_sorted, ispardiso, ismumps;
2069
2070 PetscFunctionBegin;
2071 PetscCall(ISSorted(is_I, &is_sorted));
2072 PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_I), PETSC_ERR_PLIB, "IS for I dofs should be shorted");
2073 PetscCall(ISSorted(is_B, &is_sorted));
2074 PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_B), PETSC_ERR_PLIB, "IS for B dofs should be shorted");
2075
2076 /* reset any previous data */
2077 PetscCall(PCBDDCSubSchursReset(sub_schurs));
2078
2079 sub_schurs->gdsw = gdsw;
2080 sub_schurs->graph = graph;
2081
2082 /* get index sets for faces and edges (already sorted by global ordering) */
2083 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
2084 n_all_cc = n_faces + n_edges;
2085 PetscCall(PetscBTCreate(n_all_cc, &sub_schurs->is_edge));
2086 PetscCall(PetscMalloc1(n_all_cc, &all_cc));
2087 n_all_cc = 0;
2088 for (i = 0; i < n_faces; i++) {
2089 PetscCall(ISGetSize(faces[i], &s));
2090 if (!s) continue;
2091 if (copycc) {
2092 PetscCall(ISDuplicate(faces[i], &all_cc[n_all_cc]));
2093 } else {
2094 PetscCall(PetscObjectReference((PetscObject)faces[i]));
2095 all_cc[n_all_cc] = faces[i];
2096 }
2097 n_all_cc++;
2098 }
2099 for (i = 0; i < n_edges; i++) {
2100 PetscCall(ISGetSize(edges[i], &s));
2101 if (!s) continue;
2102 if (copycc) {
2103 PetscCall(ISDuplicate(edges[i], &all_cc[n_all_cc]));
2104 } else {
2105 PetscCall(PetscObjectReference((PetscObject)edges[i]));
2106 all_cc[n_all_cc] = edges[i];
2107 }
2108 PetscCall(PetscBTSet(sub_schurs->is_edge, n_all_cc));
2109 n_all_cc++;
2110 }
2111 PetscCall(PetscObjectReference((PetscObject)vertices));
2112 sub_schurs->is_vertices = vertices;
2113 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
2114 sub_schurs->is_dir = NULL;
2115 PetscCall(PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir));
2116
2117 /* Determine if MatFactor can be used */
2118 PetscCall(PetscStrallocpy(prefix, &sub_schurs->prefix));
2119 #if defined(PETSC_HAVE_MUMPS)
2120 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type)));
2121 #elif defined(PETSC_HAVE_MKL_PARDISO)
2122 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type)));
2123 #else
2124 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type)));
2125 #endif
2126 sub_schurs->mat_factor_type = MAT_FACTOR_NONE;
2127 sub_schurs->is_hermitian = PetscDefined(USE_COMPLEX) ? PETSC_FALSE : PETSC_TRUE; /* Hermitian Cholesky is not supported by PETSc and external packages */
2128 sub_schurs->is_posdef = PETSC_TRUE;
2129 sub_schurs->is_symmetric = PETSC_TRUE;
2130 sub_schurs->debug = PETSC_FALSE;
2131 sub_schurs->restrict_comm = PETSC_FALSE;
2132 PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
2133 PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL));
2134 PetscCall(PetscOptionsEnum("-sub_schurs_mat_factor_type", "Factor type to use. Use MAT_FACTOR_NONE for automatic selection", NULL, MatFactorTypes, (PetscEnum)sub_schurs->mat_factor_type, (PetscEnum *)&sub_schurs->mat_factor_type, NULL));
2135 PetscCall(PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL));
2136 PetscCall(PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL));
2137 PetscCall(PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL));
2138 PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL));
2139 PetscCall(PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL));
2140 PetscOptionsEnd();
2141 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps));
2142 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso));
2143 sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
2144
2145 /* for reals, symmetric and Hermitian are synonyms */
2146 #if !defined(PETSC_USE_COMPLEX)
2147 sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
2148 sub_schurs->is_hermitian = sub_schurs->is_symmetric;
2149 #endif
2150
2151 PetscCall(PetscObjectReference((PetscObject)is_I));
2152 sub_schurs->is_I = is_I;
2153 PetscCall(PetscObjectReference((PetscObject)is_B));
2154 sub_schurs->is_B = is_B;
2155 PetscCall(PetscObjectReference((PetscObject)graph->l2gmap));
2156 sub_schurs->l2gmap = graph->l2gmap;
2157 PetscCall(PetscObjectReference((PetscObject)BtoNmap));
2158 sub_schurs->BtoNmap = BtoNmap;
2159 sub_schurs->n_subs = n_all_cc;
2160 sub_schurs->is_subs = all_cc;
2161 sub_schurs->S_Ej_all = NULL;
2162 sub_schurs->sum_S_Ej_all = NULL;
2163 sub_schurs->sum_S_Ej_inv_all = NULL;
2164 sub_schurs->sum_S_Ej_tilda_all = NULL;
2165 sub_schurs->is_Ej_all = NULL;
2166 PetscFunctionReturn(PETSC_SUCCESS);
2167 }
2168
PCBDDCSubSchursCreate(PCBDDCSubSchurs * sub_schurs)2169 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
2170 {
2171 PCBDDCSubSchurs schurs_ctx;
2172
2173 PetscFunctionBegin;
2174 PetscCall(PetscNew(&schurs_ctx));
2175 schurs_ctx->n_subs = 0;
2176 *sub_schurs = schurs_ctx;
2177 PetscFunctionReturn(PETSC_SUCCESS);
2178 }
2179
PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)2180 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
2181 {
2182 PetscFunctionBegin;
2183 if (!sub_schurs) PetscFunctionReturn(PETSC_SUCCESS);
2184 sub_schurs->graph = NULL;
2185 PetscCall(PetscFree(sub_schurs->prefix));
2186 PetscCall(MatDestroy(&sub_schurs->A));
2187 PetscCall(MatDestroy(&sub_schurs->S));
2188 PetscCall(ISDestroy(&sub_schurs->is_I));
2189 PetscCall(ISDestroy(&sub_schurs->is_B));
2190 PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap));
2191 PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap));
2192 PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
2193 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
2194 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
2195 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
2196 PetscCall(ISDestroy(&sub_schurs->is_Ej_all));
2197 PetscCall(ISDestroy(&sub_schurs->is_vertices));
2198 PetscCall(ISDestroy(&sub_schurs->is_dir));
2199 PetscCall(PetscBTDestroy(&sub_schurs->is_edge));
2200 for (PetscInt i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISDestroy(&sub_schurs->is_subs[i]));
2201 if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs));
2202 if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
2203 PetscCall(PetscFree(sub_schurs->reuse_solver));
2204 if (sub_schurs->change) {
2205 for (PetscInt i = 0; i < sub_schurs->n_subs; i++) {
2206 PetscCall(KSPDestroy(&sub_schurs->change[i]));
2207 PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i]));
2208 }
2209 }
2210 PetscCall(PetscFree(sub_schurs->change));
2211 PetscCall(PetscFree(sub_schurs->change_primal_sub));
2212 sub_schurs->n_subs = 0;
2213 PetscFunctionReturn(PETSC_SUCCESS);
2214 }
2215
PCBDDCSubSchursDestroy(PCBDDCSubSchurs * sub_schurs)2216 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
2217 {
2218 PetscFunctionBegin;
2219 PetscCall(PCBDDCSubSchursReset(*sub_schurs));
2220 PetscCall(PetscFree(*sub_schurs));
2221 PetscFunctionReturn(PETSC_SUCCESS);
2222 }
2223
PCBDDCAdjGetNextLayer_Private(PetscInt * queue_tip,PetscInt n_prev,PetscBT touched,PetscInt * xadj,PetscInt * adjncy,PetscInt * n_added)2224 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added)
2225 {
2226 PetscInt i, j, n;
2227
2228 PetscFunctionBegin;
2229 n = 0;
2230 for (i = -n_prev; i < 0; i++) {
2231 PetscInt start_dof = queue_tip[i];
2232 for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) {
2233 PetscInt dof = adjncy[j];
2234 if (!PetscBTLookup(touched, dof)) {
2235 PetscCall(PetscBTSet(touched, dof));
2236 queue_tip[n] = dof;
2237 n++;
2238 }
2239 }
2240 }
2241 *n_added = n;
2242 PetscFunctionReturn(PETSC_SUCCESS);
2243 }
2244