15e5bbd0aSStefano Zampini #include <petsc/private/pcbddcimpl.h>
25e5bbd0aSStefano Zampini #include <petsc/private/pcbddcprivateimpl.h>
305709791SSatish Balay #include <../src/mat/impls/dense/seq/dense.h>
408122e43SStefano Zampini #include <petscblaslapack.h>
534a97f8cSStefano Zampini
69fbee547SJacob Faibussowitsch static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *);
75ec10c6aSStefano Zampini static PetscErrorCode PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *);
8df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Interior(PC, Vec, Vec);
9df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Correction(PC, Vec, Vec);
10d62866d3SStefano Zampini
11ca92afb2SStefano Zampini /* if v2 is not present, correction is done in-place */
PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx,Vec v,Vec v2,PetscBool sol,PetscBool full)12d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13d71ae5a4SJacob Faibussowitsch {
14ca92afb2SStefano Zampini PetscScalar *array;
15ca92afb2SStefano Zampini PetscScalar *array2;
16ca92afb2SStefano Zampini
17ca92afb2SStefano Zampini PetscFunctionBegin;
183ba16761SJacob Faibussowitsch if (!ctx->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
195cbda25cSStefano Zampini if (sol && full) {
205cbda25cSStefano Zampini PetscInt n_I, size_schur;
215cbda25cSStefano Zampini
225cbda25cSStefano Zampini /* get sizes */
239566063dSJacob Faibussowitsch PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
249566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &n_I));
255cbda25cSStefano Zampini n_I = n_I - size_schur;
265cbda25cSStefano Zampini /* get schur sol from array */
279566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array));
289566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
299566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array));
305cbda25cSStefano Zampini /* apply interior sol correction */
319566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work));
329566063dSJacob Faibussowitsch PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
339566063dSJacob Faibussowitsch PetscCall(MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v));
345cbda25cSStefano Zampini }
35ca92afb2SStefano Zampini if (v2) {
36ca92afb2SStefano Zampini PetscInt nl;
37ca92afb2SStefano Zampini
389566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
399566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(v2, &nl));
409566063dSJacob Faibussowitsch PetscCall(VecGetArray(v2, &array2));
419566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array2, array, nl));
42ca92afb2SStefano Zampini } else {
439566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array));
44ca92afb2SStefano Zampini array2 = array;
45ca92afb2SStefano Zampini }
46ca92afb2SStefano Zampini if (!sol) { /* change rhs */
47ca92afb2SStefano Zampini PetscInt n;
48ca92afb2SStefano Zampini for (n = 0; n < ctx->benign_n; n++) {
49ca92afb2SStefano Zampini PetscScalar sum = 0.;
50ca92afb2SStefano Zampini const PetscInt *cols;
51ca92afb2SStefano Zampini PetscInt nz, i;
52ca92afb2SStefano Zampini
539566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
549566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
55ca92afb2SStefano Zampini for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
5622db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX)
5722db5ddcSStefano Zampini sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
5822db5ddcSStefano Zampini #else
59ca92afb2SStefano Zampini sum = -sum / nz;
6022db5ddcSStefano Zampini #endif
61ca92afb2SStefano Zampini for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
62ca92afb2SStefano Zampini ctx->benign_save_vals[n] = array2[cols[nz - 1]];
63ca92afb2SStefano Zampini array2[cols[nz - 1]] = sum;
649566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
65ca92afb2SStefano Zampini }
66ca92afb2SStefano Zampini } else {
67ca92afb2SStefano Zampini PetscInt n;
68ca92afb2SStefano Zampini for (n = 0; n < ctx->benign_n; n++) {
69ca92afb2SStefano Zampini PetscScalar sum = 0.;
70ca92afb2SStefano Zampini const PetscInt *cols;
71ca92afb2SStefano Zampini PetscInt nz, i;
729566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
739566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
74ca92afb2SStefano Zampini for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
7522db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX)
7622db5ddcSStefano Zampini sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
7722db5ddcSStefano Zampini #else
78ca92afb2SStefano Zampini sum = -sum / nz;
7922db5ddcSStefano Zampini #endif
80ca92afb2SStefano Zampini for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
81ca92afb2SStefano Zampini array2[cols[nz - 1]] = ctx->benign_save_vals[n];
829566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
83ca92afb2SStefano Zampini }
84ca92afb2SStefano Zampini }
85ca92afb2SStefano Zampini if (v2) {
869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
879566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v2, &array2));
88ca92afb2SStefano Zampini } else {
899566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array));
90ca92afb2SStefano Zampini }
915cbda25cSStefano Zampini if (!sol && full) {
925cbda25cSStefano Zampini Vec usedv;
935cbda25cSStefano Zampini PetscInt n_I, size_schur;
945cbda25cSStefano Zampini
955cbda25cSStefano Zampini /* get sizes */
969566063dSJacob Faibussowitsch PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
979566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &n_I));
985cbda25cSStefano Zampini n_I = n_I - size_schur;
995cbda25cSStefano Zampini /* compute schur rhs correction */
1005cbda25cSStefano Zampini if (v2) {
1015cbda25cSStefano Zampini usedv = v2;
1025cbda25cSStefano Zampini } else {
1035cbda25cSStefano Zampini usedv = v;
1045cbda25cSStefano Zampini }
1055cbda25cSStefano Zampini /* apply schur rhs correction */
1069566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work));
1079566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(usedv, (const PetscScalar **)&array));
1089566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
1099566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(usedv, (const PetscScalar **)&array));
1109566063dSJacob Faibussowitsch PetscCall(MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec));
1119566063dSJacob Faibussowitsch PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
1125cbda25cSStefano Zampini }
1133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
114ca92afb2SStefano Zampini }
115ca92afb2SStefano Zampini
PCBDDCReuseSolvers_Solve_Private(PC pc,Vec rhs,Vec sol,PetscBool transpose,PetscBool full)116d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
117d71ae5a4SJacob Faibussowitsch {
118df4d28bfSStefano Zampini PCBDDCReuseSolvers ctx;
119683d3df6SStefano Zampini PetscBool copy = PETSC_FALSE;
120d62866d3SStefano Zampini
121d62866d3SStefano Zampini PetscFunctionBegin;
1229566063dSJacob Faibussowitsch PetscCall(PCShellGetContext(pc, &ctx));
123683d3df6SStefano Zampini if (full) {
1249566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
1255cbda25cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
1269566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
1275cbda25cSStefano Zampini #endif
128683d3df6SStefano Zampini copy = ctx->has_vertices;
129d4933d67SStefano Zampini } else { /* interior solver */
1309566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(ctx->F, 26, 0));
131d4933d67SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
1329566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 1));
133d4933d67SStefano Zampini #endif
134683d3df6SStefano Zampini copy = PETSC_TRUE;
135683d3df6SStefano Zampini }
136683d3df6SStefano Zampini /* copy rhs into factored matrix workspace */
137683d3df6SStefano Zampini if (copy) {
138ca92afb2SStefano Zampini PetscInt n;
139df4d28bfSStefano Zampini PetscScalar *array, *array_solver;
140ca92afb2SStefano Zampini
1419566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rhs, &n));
1429566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(rhs, (const PetscScalar **)&array));
1439566063dSJacob Faibussowitsch PetscCall(VecGetArray(ctx->rhs, &array_solver));
1449566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array_solver, array, n));
1459566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(ctx->rhs, &array_solver));
1469566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(rhs, (const PetscScalar **)&array));
147683d3df6SStefano Zampini
1489566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full));
149683d3df6SStefano Zampini if (transpose) {
1509566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol));
151683d3df6SStefano Zampini } else {
1529566063dSJacob Faibussowitsch PetscCall(MatSolve(ctx->F, ctx->rhs, ctx->sol));
153683d3df6SStefano Zampini }
1549566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full));
155683d3df6SStefano Zampini
156683d3df6SStefano Zampini /* get back data to caller worskpace */
1579566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
1589566063dSJacob Faibussowitsch PetscCall(VecGetArray(sol, &array));
1599566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, array_solver, n));
1609566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(sol, &array));
1619566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
162683d3df6SStefano Zampini } else {
163ca92afb2SStefano Zampini if (ctx->benign_n) {
1649566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full));
165ca92afb2SStefano Zampini if (transpose) {
1669566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, sol));
167ca92afb2SStefano Zampini } else {
1689566063dSJacob Faibussowitsch PetscCall(MatSolve(ctx->F, ctx->rhs, sol));
169ca92afb2SStefano Zampini }
1709566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full));
171ca92afb2SStefano Zampini } else {
172e28d306cSStefano Zampini if (transpose) {
1739566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(ctx->F, rhs, sol));
174e28d306cSStefano Zampini } else {
1759566063dSJacob Faibussowitsch PetscCall(MatSolve(ctx->F, rhs, sol));
176e28d306cSStefano Zampini }
177683d3df6SStefano Zampini }
178ca92afb2SStefano Zampini }
1795cbda25cSStefano Zampini /* restore defaults */
1809566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
181d4933d67SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
1829566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
183d4933d67SStefano Zampini #endif
1843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
185d62866d3SStefano Zampini }
186d62866d3SStefano Zampini
PCBDDCReuseSolvers_Correction(PC pc,Vec rhs,Vec sol)187d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
188d71ae5a4SJacob Faibussowitsch {
189e28d306cSStefano Zampini PetscFunctionBegin;
1909566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE));
1913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
192e28d306cSStefano Zampini }
193e28d306cSStefano Zampini
PCBDDCReuseSolvers_CorrectionTranspose(PC pc,Vec rhs,Vec sol)194d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
195d71ae5a4SJacob Faibussowitsch {
196e28d306cSStefano Zampini PetscFunctionBegin;
1979566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE));
1983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
199683d3df6SStefano Zampini }
200683d3df6SStefano Zampini
PCBDDCReuseSolvers_Interior(PC pc,Vec rhs,Vec sol)201d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
202d71ae5a4SJacob Faibussowitsch {
203683d3df6SStefano Zampini PetscFunctionBegin;
2049566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE));
2053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
206683d3df6SStefano Zampini }
207683d3df6SStefano Zampini
PCBDDCReuseSolvers_InteriorTranspose(PC pc,Vec rhs,Vec sol)208d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
209d71ae5a4SJacob Faibussowitsch {
210683d3df6SStefano Zampini PetscFunctionBegin;
2119566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE));
2123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
213e28d306cSStefano Zampini }
214e28d306cSStefano Zampini
PCBDDCReuseSolvers_View(PC pc,PetscViewer viewer)215d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
216d71ae5a4SJacob Faibussowitsch {
21715579a77SStefano Zampini PCBDDCReuseSolvers ctx;
2189f196a02SMartin Diehl PetscBool isascii;
21915579a77SStefano Zampini
22015579a77SStefano Zampini PetscFunctionBegin;
2219566063dSJacob Faibussowitsch PetscCall(PCShellGetContext(pc, &ctx));
2229f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
2239f196a02SMartin Diehl if (isascii) PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO));
2249566063dSJacob Faibussowitsch PetscCall(MatView(ctx->F, viewer));
2259f196a02SMartin Diehl if (isascii) PetscCall(PetscViewerPopFormat(viewer));
2263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
22715579a77SStefano Zampini }
22815579a77SStefano Zampini
PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)229d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
230d71ae5a4SJacob Faibussowitsch {
231ca92afb2SStefano Zampini PetscInt i;
232d62866d3SStefano Zampini
233d62866d3SStefano Zampini PetscFunctionBegin;
2349566063dSJacob Faibussowitsch PetscCall(MatDestroy(&reuse->F));
2359566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->sol));
2369566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->rhs));
2379566063dSJacob Faibussowitsch PetscCall(PCDestroy(&reuse->interior_solver));
2389566063dSJacob Faibussowitsch PetscCall(PCDestroy(&reuse->correction_solver));
2399566063dSJacob Faibussowitsch PetscCall(ISDestroy(&reuse->is_R));
2409566063dSJacob Faibussowitsch PetscCall(ISDestroy(&reuse->is_B));
2419566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&reuse->correction_scatter_B));
2429566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->sol_B));
2439566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->rhs_B));
24448a46eb9SPierre Jolivet for (i = 0; i < reuse->benign_n; i++) PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i]));
2459566063dSJacob Faibussowitsch PetscCall(PetscFree(reuse->benign_zerodiag_subs));
2469566063dSJacob Faibussowitsch PetscCall(PetscFree(reuse->benign_save_vals));
2479566063dSJacob Faibussowitsch PetscCall(MatDestroy(&reuse->benign_csAIB));
2489566063dSJacob Faibussowitsch PetscCall(MatDestroy(&reuse->benign_AIIm1ones));
2499566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->benign_corr_work));
2509566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec));
2513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
252d62866d3SStefano Zampini }
253d5574798SStefano Zampini
PCBDDCReuseSolvers_Destroy(PC pc)254d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc)
255d71ae5a4SJacob Faibussowitsch {
25632fe681dSStefano Zampini PCBDDCReuseSolvers ctx;
25732fe681dSStefano Zampini
25832fe681dSStefano Zampini PetscFunctionBegin;
25932fe681dSStefano Zampini PetscCall(PCShellGetContext(pc, &ctx));
26032fe681dSStefano Zampini PetscCall(PCBDDCReuseSolversReset(ctx));
26132fe681dSStefano Zampini PetscCall(PetscFree(ctx));
26232fe681dSStefano Zampini PetscCall(PCShellSetContext(pc, NULL));
2633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
26432fe681dSStefano Zampini }
26532fe681dSStefano Zampini
PCBDDCComputeExplicitSchur(Mat M,PetscBool issym,MatReuse reuse,Mat * S)266d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
267d71ae5a4SJacob Faibussowitsch {
2683202ece2SStefano Zampini Mat B, C, D, Bd, Cd, AinvBd;
2693202ece2SStefano Zampini KSP ksp;
2703202ece2SStefano Zampini PC pc;
2713202ece2SStefano Zampini PetscBool isLU, isILU, isCHOL, Bdense, Cdense;
2723202ece2SStefano Zampini PetscReal fill = 2.0;
273f11841e3SStefano Zampini PetscInt n_I;
2743202ece2SStefano Zampini PetscMPIInt size;
2753202ece2SStefano Zampini
2763202ece2SStefano Zampini PetscFunctionBegin;
2779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M), &size));
2787827d75bSBarry Smith PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for parallel matrices");
279f11841e3SStefano Zampini if (reuse == MAT_REUSE_MATRIX) {
280f11841e3SStefano Zampini PetscBool Sdense;
281f11841e3SStefano Zampini
2829566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense));
28328b400f6SJacob Faibussowitsch PetscCheck(Sdense, PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "S should dense");
284f11841e3SStefano Zampini }
2859566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D));
2869566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(M, &ksp));
2879566063dSJacob Faibussowitsch PetscCall(KSPGetPC(ksp, &pc));
2889566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU));
2899566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU));
2909566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL));
2919566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense));
2929566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense));
2939566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &n_I, NULL));
294f11841e3SStefano Zampini if (n_I) {
2953202ece2SStefano Zampini if (!Bdense) {
2969566063dSJacob Faibussowitsch PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd));
2973202ece2SStefano Zampini } else {
2983202ece2SStefano Zampini Bd = B;
2993202ece2SStefano Zampini }
3003202ece2SStefano Zampini
3013202ece2SStefano Zampini if (isLU || isILU || isCHOL) {
3023202ece2SStefano Zampini Mat fact;
3039566063dSJacob Faibussowitsch PetscCall(KSPSetUp(ksp));
3049566063dSJacob Faibussowitsch PetscCall(PCFactorGetMatrix(pc, &fact));
3059566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
3069566063dSJacob Faibussowitsch PetscCall(MatMatSolve(fact, Bd, AinvBd));
3073202ece2SStefano Zampini } else {
30807b1e237SStefano Zampini PetscBool ex = PETSC_TRUE;
30907b1e237SStefano Zampini
31007b1e237SStefano Zampini if (ex) {
3113202ece2SStefano Zampini Mat Ainvd;
3123202ece2SStefano Zampini
3139566063dSJacob Faibussowitsch PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd));
3149566063dSJacob Faibussowitsch PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd));
3159566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Ainvd));
31607b1e237SStefano Zampini } else {
31707b1e237SStefano Zampini Vec sol, rhs;
31807b1e237SStefano Zampini PetscScalar *arrayrhs, *arraysol;
31907b1e237SStefano Zampini PetscInt i, nrhs, n;
32007b1e237SStefano Zampini
3219566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
3229566063dSJacob Faibussowitsch PetscCall(MatGetSize(Bd, &n, &nrhs));
3239566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bd, &arrayrhs));
3249566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(AinvBd, &arraysol));
3259566063dSJacob Faibussowitsch PetscCall(KSPGetSolution(ksp, &sol));
3269566063dSJacob Faibussowitsch PetscCall(KSPGetRhs(ksp, &rhs));
32707b1e237SStefano Zampini for (i = 0; i < nrhs; i++) {
3289566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(rhs, arrayrhs + i * n));
3299566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(sol, arraysol + i * n));
3309566063dSJacob Faibussowitsch PetscCall(KSPSolve(ksp, rhs, sol));
3319566063dSJacob Faibussowitsch PetscCall(VecResetArray(rhs));
3329566063dSJacob Faibussowitsch PetscCall(VecResetArray(sol));
33307b1e237SStefano Zampini }
3349566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bd, &arrayrhs));
3359566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(AinvBd, &arrayrhs));
33607b1e237SStefano Zampini }
3373202ece2SStefano Zampini }
33848a46eb9SPierre Jolivet if (!Bdense & !issym) PetscCall(MatDestroy(&Bd));
3395ec10c6aSStefano Zampini
3405ec10c6aSStefano Zampini if (!issym) {
3413202ece2SStefano Zampini if (!Cdense) {
3429566063dSJacob Faibussowitsch PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd));
3433202ece2SStefano Zampini } else {
3443202ece2SStefano Zampini Cd = C;
3453202ece2SStefano Zampini }
3469566063dSJacob Faibussowitsch PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S));
34748a46eb9SPierre Jolivet if (!Cdense) PetscCall(MatDestroy(&Cd));
3485ec10c6aSStefano Zampini } else {
3499566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S));
35048a46eb9SPierre Jolivet if (!Bdense) PetscCall(MatDestroy(&Bd));
3515ec10c6aSStefano Zampini }
3529566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AinvBd));
353f11841e3SStefano Zampini }
3543202ece2SStefano Zampini
3553202ece2SStefano Zampini if (D) {
3563202ece2SStefano Zampini Mat Dd;
3573202ece2SStefano Zampini PetscBool Ddense;
3583202ece2SStefano Zampini
3599566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense));
3603202ece2SStefano Zampini if (!Ddense) {
3619566063dSJacob Faibussowitsch PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd));
3623202ece2SStefano Zampini } else {
3633202ece2SStefano Zampini Dd = D;
3643202ece2SStefano Zampini }
365f11841e3SStefano Zampini if (n_I) {
3669566063dSJacob Faibussowitsch PetscCall(MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN));
367f11841e3SStefano Zampini } else {
368f11841e3SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) {
3699566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Dd, MAT_COPY_VALUES, S));
370f11841e3SStefano Zampini } else {
3719566063dSJacob Faibussowitsch PetscCall(MatCopy(Dd, *S, SAME_NONZERO_PATTERN));
372f11841e3SStefano Zampini }
373f11841e3SStefano Zampini }
37448a46eb9SPierre Jolivet if (!Ddense) PetscCall(MatDestroy(&Dd));
3753202ece2SStefano Zampini } else {
3769566063dSJacob Faibussowitsch PetscCall(MatScale(*S, -1.0));
3773202ece2SStefano Zampini }
3783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3793202ece2SStefano Zampini }
38034a97f8cSStefano Zampini
PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs,Mat Ain,Mat Sin,PetscBool exact_schur,PetscInt xadj[],PetscInt adjncy[],PetscInt nlayers,Vec scaling,PetscBool compute_Stilda,PetscBool reuse_solvers,PetscBool benign_trick,PetscInt benign_n,PetscInt benign_p0_lidx[],IS benign_zerodiag_subs[],Mat change,IS change_primal)381d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
382d71ae5a4SJacob Faibussowitsch {
383be83ff47SStefano Zampini Mat F, A_II, A_IB, A_BI, A_BB, AE_II;
384be83ff47SStefano Zampini Mat S_all;
38557a87bf3SStefano Zampini Vec gstash, lstash;
38657a87bf3SStefano Zampini VecScatter sstash;
387b7ab4a40SStefano Zampini IS is_I, is_I_layer;
388dc456d91SStefano Zampini IS all_subsets, all_subsets_mult, all_subsets_n;
38957a87bf3SStefano Zampini PetscScalar *stasharray, *Bwork;
390791bdc09SStefano Zampini PetscInt *all_local_idx_N, *all_local_subid_N = NULL;
391dc456d91SStefano Zampini PetscInt *auxnum1, *auxnum2;
392791bdc09SStefano Zampini PetscInt *local_subs = sub_schurs->graph->local_subs;
393791bdc09SStefano Zampini PetscInt i, subset_size, max_subset_size, n_local_subs = sub_schurs->graph->n_local_subs;
394683d3df6SStefano Zampini PetscInt n_B, extra, local_size, global_size;
39557a87bf3SStefano Zampini PetscInt local_stash_size;
39608122e43SStefano Zampini PetscBLASInt B_N, B_ierr, B_lwork, *pivots;
3975a95e1ceSStefano Zampini MPI_Comm comm_n;
398f4f7d9d6SStefano Zampini PetscBool deluxe = PETSC_TRUE;
399f4f7d9d6SStefano Zampini PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
4003b03f7bbSStefano Zampini PetscViewer matl_dbg_viewer = NULL;
401791bdc09SStefano Zampini PetscBool flg, multi_element = sub_schurs->graph->multi_element;
402b1b3d7a2SStefano Zampini
403b1b3d7a2SStefano Zampini PetscFunctionBegin;
4049566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->A));
4059566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->S));
406e62b6521Sstefano_zampini if (Ain) {
4079566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)Ain));
408a64f4aa4SStefano Zampini sub_schurs->A = Ain;
409a64f4aa4SStefano Zampini }
4103301b35fSStefano Zampini
4119566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)Sin));
412a64f4aa4SStefano Zampini sub_schurs->S = Sin;
413ad540459SPierre Jolivet if (sub_schurs->schur_explicit) sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
414a64f4aa4SStefano Zampini
4155a95e1ceSStefano Zampini /* preliminary checks */
4167827d75bSBarry Smith PetscCheck(sub_schurs->schur_explicit || !compute_Stilda, PetscObjectComm((PetscObject)sub_schurs->l2gmap), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
4175a95e1ceSStefano Zampini
41888113c35SStefano Zampini if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
41988113c35SStefano Zampini
4203b03f7bbSStefano Zampini /* debug (MATLAB) */
4217f9db97bSStefano Zampini if (sub_schurs->debug) {
4227f9db97bSStefano Zampini PetscMPIInt size, rank;
4237ebab0bbSStefano Zampini PetscInt nr, *print_schurs_ranks, print_schurs = PETSC_FALSE;
4247f9db97bSStefano Zampini
4259566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size));
4269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
4277f9db97bSStefano Zampini nr = size;
4289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nr, &print_schurs_ranks));
429d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
4309566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg));
4317f9db97bSStefano Zampini if (!flg) print_schurs = PETSC_TRUE;
4327f9db97bSStefano Zampini else {
4337ebab0bbSStefano Zampini print_schurs = PETSC_FALSE;
4349371c9d4SSatish Balay for (i = 0; i < nr; i++)
435835f2295SStefano Zampini if (print_schurs_ranks[i] == rank) {
4369371c9d4SSatish Balay print_schurs = PETSC_TRUE;
4379371c9d4SSatish Balay break;
4389371c9d4SSatish Balay }
4397f9db97bSStefano Zampini }
440d0609cedSBarry Smith PetscOptionsEnd();
4419566063dSJacob Faibussowitsch PetscCall(PetscFree(print_schurs_ranks));
4423b03f7bbSStefano Zampini if (print_schurs) {
4433b03f7bbSStefano Zampini char filename[256];
4443b03f7bbSStefano Zampini
4459566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank));
4469566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer));
4479566063dSJacob Faibussowitsch PetscCall(PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB));
4483b03f7bbSStefano Zampini }
4497f9db97bSStefano Zampini }
4507f9db97bSStefano Zampini
451791bdc09SStefano Zampini /* DEBUG: turn on/off multi-element code path */
452791bdc09SStefano Zampini PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_multielement_code", &multi_element, NULL));
453791bdc09SStefano Zampini if (n_local_subs == 0) multi_element = PETSC_FALSE;
454791bdc09SStefano Zampini
4555a95e1ceSStefano Zampini /* restrict work on active processes */
456991c41b4SStefano Zampini if (sub_schurs->restrict_comm) {
457991c41b4SStefano Zampini PetscSubcomm subcomm;
458991c41b4SStefano Zampini PetscMPIInt color, rank;
459991c41b4SStefano Zampini
4605a95e1ceSStefano Zampini color = 0;
4615a95e1ceSStefano Zampini if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
4629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
4639566063dSJacob Faibussowitsch PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm));
4649566063dSJacob Faibussowitsch PetscCall(PetscSubcommSetNumber(subcomm, 2));
4659566063dSJacob Faibussowitsch PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
4669566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL));
4679566063dSJacob Faibussowitsch PetscCall(PetscSubcommDestroy(&subcomm));
4685a95e1ceSStefano Zampini if (!sub_schurs->n_subs) {
4699566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm_n));
4703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4715a95e1ceSStefano Zampini }
472991c41b4SStefano Zampini } else {
4739566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL));
474991c41b4SStefano Zampini }
4755a95e1ceSStefano Zampini
476b1b3d7a2SStefano Zampini /* get Schur complement matrices */
477df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) {
478a64f4aa4SStefano Zampini Mat tA_IB, tA_BI, tA_BB;
4793301b35fSStefano Zampini PetscBool isseqsbaij;
4809566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB));
4819566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij));
4823301b35fSStefano Zampini if (isseqsbaij) {
4839566063dSJacob Faibussowitsch PetscCall(MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB));
4849566063dSJacob Faibussowitsch PetscCall(MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB));
4859566063dSJacob Faibussowitsch PetscCall(MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI));
486a64f4aa4SStefano Zampini } else {
4879566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)tA_BB));
488a64f4aa4SStefano Zampini A_BB = tA_BB;
4899566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)tA_IB));
490a64f4aa4SStefano Zampini A_IB = tA_IB;
4919566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)tA_BI));
492a64f4aa4SStefano Zampini A_BI = tA_BI;
493f11841e3SStefano Zampini }
494a58a30b4SStefano Zampini } else {
4955a95e1ceSStefano Zampini A_II = NULL;
4965a95e1ceSStefano Zampini A_IB = NULL;
4975a95e1ceSStefano Zampini A_BI = NULL;
4985a95e1ceSStefano Zampini A_BB = NULL;
499b1b3d7a2SStefano Zampini }
5005a95e1ceSStefano Zampini S_all = NULL;
501b1b3d7a2SStefano Zampini
502b1b3d7a2SStefano Zampini /* determine interior problems */
5039566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_I, &i));
5043dc780c3SStefano Zampini if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
505b1b3d7a2SStefano Zampini PetscBT touched;
506b1b3d7a2SStefano Zampini const PetscInt *idx_B;
507b1b3d7a2SStefano Zampini PetscInt n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering;
508b1b3d7a2SStefano Zampini
50928b400f6SJacob Faibussowitsch PetscCheck(xadj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot request layering without adjacency");
510b1b3d7a2SStefano Zampini /* get sizes */
5119566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_I, &n_I));
5129566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
513b1b3d7a2SStefano Zampini
5149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_I + n_B, &local_numbering));
5159566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(n_I + n_B, &touched));
5169566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(n_I + n_B, touched));
517b1b3d7a2SStefano Zampini
518b1b3d7a2SStefano Zampini /* all boundary dofs must be skipped when adding layers */
5199566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_B, &idx_B));
52048a46eb9SPierre Jolivet for (j = 0; j < n_B; j++) PetscCall(PetscBTSet(touched, idx_B[j]));
5219566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(local_numbering, idx_B, n_B));
5229566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_B, &idx_B));
523b1b3d7a2SStefano Zampini
524b1b3d7a2SStefano Zampini /* add prescribed number of layers of dofs */
525b1b3d7a2SStefano Zampini n_local_dofs = n_B;
526b1b3d7a2SStefano Zampini n_prev_added = n_B;
527b1b3d7a2SStefano Zampini for (layer = 0; layer < nlayers; layer++) {
528b6bace71SJacob Faibussowitsch PetscInt n_added = 0;
529b1b3d7a2SStefano Zampini if (n_local_dofs == n_I + n_B) break;
53063a3b9bcSJacob Faibussowitsch PetscCheck(n_local_dofs <= n_I + n_B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")", layer, n_local_dofs, n_I + n_B);
5319566063dSJacob Faibussowitsch PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added));
532b1b3d7a2SStefano Zampini n_prev_added = n_added;
533b1b3d7a2SStefano Zampini n_local_dofs += n_added;
534b1b3d7a2SStefano Zampini if (!n_added) break;
535b1b3d7a2SStefano Zampini }
5369566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&touched));
537b1b3d7a2SStefano Zampini
538883469d8SStefano Zampini /* IS for I layer dofs in original numbering */
5399566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer));
5409566063dSJacob Faibussowitsch PetscCall(PetscFree(local_numbering));
5419566063dSJacob Faibussowitsch PetscCall(ISSort(is_I_layer));
542883469d8SStefano Zampini /* IS for I layer dofs in I numbering */
543df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) {
544b1b3d7a2SStefano Zampini ISLocalToGlobalMapping ItoNmap;
5459566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap));
5469566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I));
5479566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap));
548b1b3d7a2SStefano Zampini
549b1b3d7a2SStefano Zampini /* II block */
5509566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II));
551b1b3d7a2SStefano Zampini }
552b1b3d7a2SStefano Zampini } else {
553b1b3d7a2SStefano Zampini PetscInt n_I;
554b1b3d7a2SStefano Zampini
555b1b3d7a2SStefano Zampini /* IS for I dofs in original numbering */
5569566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I));
557a9b99552SStefano Zampini is_I_layer = sub_schurs->is_I;
558b1b3d7a2SStefano Zampini
559b1b3d7a2SStefano Zampini /* IS for I dofs in I numbering (strided 1) */
560df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) {
5619566063dSJacob Faibussowitsch PetscCall(ISGetSize(sub_schurs->is_I, &n_I));
5629566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I));
563b1b3d7a2SStefano Zampini
564b1b3d7a2SStefano Zampini /* II block is the same */
5659566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A_II));
566b1b3d7a2SStefano Zampini AE_II = A_II;
567b1b3d7a2SStefano Zampini }
568b1b3d7a2SStefano Zampini }
5695a95e1ceSStefano Zampini
570883469d8SStefano Zampini /* Get info on subset sizes and sum of all subsets sizes */
5715a95e1ceSStefano Zampini max_subset_size = 0;
572883469d8SStefano Zampini local_size = 0;
5735a95e1ceSStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) {
5749566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
5755a95e1ceSStefano Zampini max_subset_size = PetscMax(subset_size, max_subset_size);
576883469d8SStefano Zampini local_size += subset_size;
577883469d8SStefano Zampini }
578883469d8SStefano Zampini
579883469d8SStefano Zampini /* Work arrays for local indices */
580883469d8SStefano Zampini extra = 0;
5819566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
58248a46eb9SPierre Jolivet if (sub_schurs->schur_explicit && is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &extra));
5839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_B + extra, &all_local_idx_N));
584791bdc09SStefano Zampini if (multi_element) PetscCall(PetscMalloc1(n_B + extra, &all_local_subid_N));
585883469d8SStefano Zampini if (extra) {
586883469d8SStefano Zampini const PetscInt *idxs;
5879566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_I_layer, &idxs));
5889566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N, idxs, extra));
589791bdc09SStefano Zampini if (multi_element)
590791bdc09SStefano Zampini for (PetscInt j = 0; j < extra; j++) all_local_subid_N[j] = local_subs[idxs[j]];
5919566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_I_layer, &idxs));
592883469d8SStefano Zampini }
5939566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum1));
5949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum2));
595883469d8SStefano Zampini
596883469d8SStefano Zampini /* Get local indices in local numbering */
597883469d8SStefano Zampini local_size = 0;
59857a87bf3SStefano Zampini local_stash_size = 0;
5995a95e1ceSStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) {
600883469d8SStefano Zampini const PetscInt *idxs;
601883469d8SStefano Zampini
6029566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
6039566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
604eb595f79SStefano Zampini /* start (smallest in global ordering) and multiplicity */
605eb595f79SStefano Zampini auxnum1[i] = idxs[0];
60657a87bf3SStefano Zampini auxnum2[i] = subset_size * subset_size;
607883469d8SStefano Zampini /* subset indices in local numbering */
6089566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size));
609791bdc09SStefano Zampini if (multi_element)
610791bdc09SStefano Zampini for (PetscInt j = 0; j < subset_size; j++) all_local_subid_N[j + local_size + extra] = local_subs[idxs[j]];
6119566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
612883469d8SStefano Zampini local_size += subset_size;
61357a87bf3SStefano Zampini local_stash_size += subset_size * subset_size;
614883469d8SStefano Zampini }
615883469d8SStefano Zampini
61679329b78SStefano Zampini /* allocate extra workspace needed only for GETRI or SYTRF when inverting the blocks or the entire Schur complement */
61711955456SStefano Zampini use_potr = use_sytr = PETSC_FALSE;
61811955456SStefano Zampini if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
619f4f7d9d6SStefano Zampini use_potr = PETSC_TRUE;
62011955456SStefano Zampini } else if (sub_schurs->is_symmetric) {
62111955456SStefano Zampini use_sytr = PETSC_TRUE;
62211955456SStefano Zampini }
62379329b78SStefano Zampini if (local_size && !use_potr && compute_Stilda) {
62459ac4de7SStefano Zampini PetscScalar lwork, dummyscalar = 0.;
62559ac4de7SStefano Zampini PetscBLASInt dummyint = 0;
626d2627357SStefano Zampini
627d2627357SStefano Zampini B_lwork = -1;
6289566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(local_size, &B_N));
6299566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
630f4f7d9d6SStefano Zampini if (use_sytr) {
631792fecdfSBarry Smith PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
632835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
633f4f7d9d6SStefano Zampini } else {
634792fecdfSBarry Smith PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
635835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
636f4f7d9d6SStefano Zampini }
6379566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop());
6389566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
6399566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(B_lwork, &Bwork, B_N, &pivots));
640056290a2SStefano Zampini } else {
641056290a2SStefano Zampini Bwork = NULL;
642056290a2SStefano Zampini pivots = NULL;
643d2627357SStefano Zampini }
644d2627357SStefano Zampini
64557a87bf3SStefano Zampini /* prepare data for summing up properly schurs on subsets */
6469566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n));
6479566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets));
6489566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets_n));
6499566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult));
6509566063dSJacob Faibussowitsch PetscCall(ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n));
6519566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets));
6529566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets_mult));
6539566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(all_subsets_n, &i));
65463a3b9bcSJacob Faibussowitsch PetscCheck(i == local_stash_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT, i, local_stash_size);
6559566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash));
6569566063dSJacob Faibussowitsch PetscCall(VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash));
6579566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash));
6589566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets_n));
6592972d61bSStefano Zampini
6605a95e1ceSStefano Zampini /* subset indices in local boundary numbering */
6615a95e1ceSStefano Zampini if (!sub_schurs->is_Ej_all) {
6625a95e1ceSStefano Zampini PetscInt *all_local_idx_B;
6635a95e1ceSStefano Zampini
6649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(local_size, &all_local_idx_B));
6659566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B));
66663a3b9bcSJacob Faibussowitsch PetscCheck(subset_size == local_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT, subset_size, local_size);
6679566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all));
668b1b3d7a2SStefano Zampini }
669b1b3d7a2SStefano Zampini
67072b8c272SStefano Zampini if (change) {
67172b8c272SStefano Zampini ISLocalToGlobalMapping BtoS;
67272b8c272SStefano Zampini IS change_primal_B;
67372b8c272SStefano Zampini IS change_primal_all;
67472b8c272SStefano Zampini
67528b400f6SJacob Faibussowitsch PetscCheck(!sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
67628b400f6SJacob Faibussowitsch PetscCheck(!sub_schurs->change, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
6779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub));
67872b8c272SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) {
67972b8c272SStefano Zampini ISLocalToGlobalMapping NtoS;
6809566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS));
6819566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i]));
6829566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&NtoS));
68372b8c272SStefano Zampini }
6849566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B));
6859566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS));
6869566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all));
6879566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&BtoS));
6889566063dSJacob Faibussowitsch PetscCall(ISDestroy(&change_primal_B));
6899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change));
69072b8c272SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) {
69172b8c272SStefano Zampini Mat change_sub;
69272b8c272SStefano Zampini
6939566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
6949566063dSJacob Faibussowitsch PetscCall(KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i]));
6953821be0aSBarry Smith PetscCall(KSPSetNestLevel(sub_schurs->change[i], 1)); /* do not seem to have direct access to a PC from which to get the level of nests */
6969566063dSJacob Faibussowitsch PetscCall(KSPSetType(sub_schurs->change[i], KSPPREONLY));
69772b8c272SStefano Zampini if (!sub_schurs->change_with_qr) {
6989566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub));
69972b8c272SStefano Zampini } else {
70072b8c272SStefano Zampini Mat change_subt;
7019566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt));
7029566063dSJacob Faibussowitsch PetscCall(MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub));
7039566063dSJacob Faibussowitsch PetscCall(MatDestroy(&change_subt));
70472b8c272SStefano Zampini }
7059566063dSJacob Faibussowitsch PetscCall(KSPSetOperators(sub_schurs->change[i], change_sub, change_sub));
7069566063dSJacob Faibussowitsch PetscCall(MatDestroy(&change_sub));
7079566063dSJacob Faibussowitsch PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix));
7089566063dSJacob Faibussowitsch PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_"));
70972b8c272SStefano Zampini }
7109566063dSJacob Faibussowitsch PetscCall(ISDestroy(&change_primal_all));
71172b8c272SStefano Zampini }
71272b8c272SStefano Zampini
7135a95e1ceSStefano Zampini /* Local matrix of all local Schur on subsets (transposed) */
7145a95e1ceSStefano Zampini if (!sub_schurs->S_Ej_all) {
71504c5b2e6SStefano Zampini Mat T;
71604c5b2e6SStefano Zampini PetscScalar *v;
71704c5b2e6SStefano Zampini PetscInt *ii, *jj;
71804c5b2e6SStefano Zampini PetscInt cum, i, j, k;
71904c5b2e6SStefano Zampini
72004c5b2e6SStefano Zampini /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
72104c5b2e6SStefano Zampini Allocate properly a representative matrix and duplicate */
7229566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v));
72304c5b2e6SStefano Zampini ii[0] = 0;
72404c5b2e6SStefano Zampini cum = 0;
72504c5b2e6SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) {
7269566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
72704c5b2e6SStefano Zampini for (j = 0; j < subset_size; j++) {
72804c5b2e6SStefano Zampini const PetscInt row = cum + j;
72904c5b2e6SStefano Zampini PetscInt col = cum;
73004c5b2e6SStefano Zampini
73104c5b2e6SStefano Zampini ii[row + 1] = ii[row] + subset_size;
73204c5b2e6SStefano Zampini for (k = ii[row]; k < ii[row + 1]; k++) {
73304c5b2e6SStefano Zampini jj[k] = col;
73404c5b2e6SStefano Zampini col++;
73504c5b2e6SStefano Zampini }
73604c5b2e6SStefano Zampini }
73704c5b2e6SStefano Zampini cum += subset_size;
73804c5b2e6SStefano Zampini }
7399566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T));
7409566063dSJacob Faibussowitsch PetscCall(MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all));
7419566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T));
7429566063dSJacob Faibussowitsch PetscCall(PetscFree3(ii, jj, v));
74304c5b2e6SStefano Zampini }
74404c5b2e6SStefano Zampini /* matrices for deluxe scaling and adaptive selection */
74504c5b2e6SStefano Zampini if (compute_Stilda) {
74648a46eb9SPierre Jolivet if (!sub_schurs->sum_S_Ej_tilda_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all));
74748a46eb9SPierre Jolivet if (!sub_schurs->sum_S_Ej_inv_all && deluxe) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all));
748aa83b6aeSStefano Zampini }
749b1b3d7a2SStefano Zampini
7505a95e1ceSStefano Zampini /* Compute Schur complements explicitly */
751be83ff47SStefano Zampini F = NULL;
752d943a642SStefano Zampini if (!sub_schurs->schur_explicit) {
753d943a642SStefano Zampini /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
754d943a642SStefano Zampini it is not efficient, unless the economic version of the scaling is used */
7555a95e1ceSStefano Zampini Mat S_Ej_expl;
7565a95e1ceSStefano Zampini PetscScalar *work;
7575a95e1ceSStefano Zampini PetscInt j, *dummy_idx;
7585a95e1ceSStefano Zampini PetscBool Sdense;
7595a95e1ceSStefano Zampini
7609566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work));
7615a95e1ceSStefano Zampini local_size = 0;
762b1b3d7a2SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) {
7635a95e1ceSStefano Zampini IS is_subset_B;
7645a95e1ceSStefano Zampini Mat AE_EE, AE_IE, AE_EI, S_Ej;
7655a95e1ceSStefano Zampini
7665a95e1ceSStefano Zampini /* subsets in original and boundary numbering */
7679566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B));
7685a95e1ceSStefano Zampini /* EE block */
7699566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE));
7705a95e1ceSStefano Zampini /* IE block */
7719566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE));
7725a95e1ceSStefano Zampini /* EI block */
773d943a642SStefano Zampini if (sub_schurs->is_symmetric) {
7749566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(AE_IE, &AE_EI));
775d943a642SStefano Zampini } else if (sub_schurs->is_hermitian) {
7769566063dSJacob Faibussowitsch PetscCall(MatCreateHermitianTranspose(AE_IE, &AE_EI));
7775a95e1ceSStefano Zampini } else {
7789566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI));
7795a95e1ceSStefano Zampini }
7809566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_subset_B));
7819566063dSJacob Faibussowitsch PetscCall(MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej));
7829566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_EE));
7839566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_IE));
7849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_EI));
785b1b3d7a2SStefano Zampini if (AE_II == A_II) { /* we can reuse the same ksp */
786b1b3d7a2SStefano Zampini KSP ksp;
7879566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &ksp));
7889566063dSJacob Faibussowitsch PetscCall(MatSchurComplementSetKSP(S_Ej, ksp));
789b1b3d7a2SStefano Zampini } else { /* build new ksp object which inherits ksp and pc types from the original one */
790b1b3d7a2SStefano Zampini KSP origksp, schurksp;
791b1b3d7a2SStefano Zampini PC origpc, schurpc;
792b1b3d7a2SStefano Zampini KSPType ksp_type;
793b1b3d7a2SStefano Zampini PetscInt n_internal;
7945a95e1ceSStefano Zampini PetscBool ispcnone;
795b1b3d7a2SStefano Zampini
7969566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &origksp));
7979566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(S_Ej, &schurksp));
7989566063dSJacob Faibussowitsch PetscCall(KSPGetType(origksp, &ksp_type));
7999566063dSJacob Faibussowitsch PetscCall(KSPSetType(schurksp, ksp_type));
8009566063dSJacob Faibussowitsch PetscCall(KSPGetPC(schurksp, &schurpc));
8019566063dSJacob Faibussowitsch PetscCall(KSPGetPC(origksp, &origpc));
8029566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone));
8035a95e1ceSStefano Zampini if (!ispcnone) {
8045a95e1ceSStefano Zampini PCType pc_type;
8059566063dSJacob Faibussowitsch PetscCall(PCGetType(origpc, &pc_type));
8069566063dSJacob Faibussowitsch PetscCall(PCSetType(schurpc, pc_type));
8075a95e1ceSStefano Zampini } else {
8089566063dSJacob Faibussowitsch PetscCall(PCSetType(schurpc, PCLU));
8095a95e1ceSStefano Zampini }
8109566063dSJacob Faibussowitsch PetscCall(ISGetSize(is_I, &n_internal));
811365a3a41SStefano Zampini if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
8123ca39a21SBarry Smith MatSolverType solver = NULL;
813835f2295SStefano Zampini PetscCall(PCFactorGetMatSolverType(origpc, &solver));
8141baa6e33SBarry Smith if (solver) PetscCall(PCFactorSetMatSolverType(schurpc, solver));
815b1b3d7a2SStefano Zampini }
8169566063dSJacob Faibussowitsch PetscCall(KSPSetUp(schurksp));
817b1b3d7a2SStefano Zampini }
8189566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
8199566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl));
8209566063dSJacob Faibussowitsch PetscCall(PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl));
8219566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense));
8220fdf79fbSJacob Faibussowitsch PetscCheck(Sdense, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices");
823ad540459SPierre Jolivet for (j = 0; j < subset_size; j++) dummy_idx[j] = local_size + j;
8249566063dSJacob Faibussowitsch PetscCall(MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES));
8259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_Ej));
8269566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_Ej_expl));
8275a95e1ceSStefano Zampini local_size += subset_size;
8285a95e1ceSStefano Zampini }
8299566063dSJacob Faibussowitsch PetscCall(PetscFree2(dummy_idx, work));
830b1b3d7a2SStefano Zampini /* free */
8319566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_I));
8329566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_II));
8339566063dSJacob Faibussowitsch PetscCall(PetscFree(all_local_idx_N));
834883469d8SStefano Zampini } else {
8355cbda25cSStefano Zampini Mat A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL;
83632fe681dSStefano Zampini Mat *gdswA;
8379d54b7f4SStefano Zampini Vec Dall = NULL;
838791bdc09SStefano Zampini IS is_A_all, *is_p_r = NULL, is_schur;
8397ebab0bbSStefano Zampini MatType Stype;
8405cbda25cSStefano Zampini PetscScalar *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL;
84104c5b2e6SStefano Zampini PetscScalar *SEj_arr = NULL, *SEjinv_arr = NULL;
8421683a169SBarry Smith const PetscScalar *rS_data;
84304c5b2e6SStefano Zampini PetscInt n, n_I, size_schur, size_active_schur, cum, cum2;
8443fc34f97SStefano Zampini PetscBool economic, solver_S, S_lower_triangular = PETSC_FALSE;
8453fc34f97SStefano Zampini PetscBool schur_has_vertices, factor_workaround;
84611955456SStefano Zampini PetscBool use_cholesky;
8477ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
8487ebab0bbSStefano Zampini PetscBool oldpin;
8497ebab0bbSStefano Zampini #endif
850791bdc09SStefano Zampini /* multi-element */
851791bdc09SStefano Zampini IS *is_sub_all = NULL, *is_sub_schur_all = NULL, *is_sub_schur = NULL;
852883469d8SStefano Zampini
853683d3df6SStefano Zampini /* get sizes */
85481ea8064SStefano Zampini n_I = 0;
85548a46eb9SPierre Jolivet if (is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &n_I));
856683d3df6SStefano Zampini economic = PETSC_FALSE;
8579566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_I, &cum));
858683d3df6SStefano Zampini if (cum != n_I) economic = PETSC_TRUE;
8599566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(sub_schurs->A, &n, NULL));
8609d54b7f4SStefano Zampini size_active_schur = local_size;
8619d54b7f4SStefano Zampini
862f17d2ae1SStefano Zampini /* import scaling vector (wrong formulation if we have 3D edges) */
8639d54b7f4SStefano Zampini if (scaling && compute_Stilda) {
8649d54b7f4SStefano Zampini const PetscScalar *array;
8659d54b7f4SStefano Zampini PetscScalar *array2;
8669d54b7f4SStefano Zampini const PetscInt *idxs;
8679d54b7f4SStefano Zampini PetscInt i;
8689d54b7f4SStefano Zampini
8699566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
8709566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall));
8719566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scaling, &array));
8729566063dSJacob Faibussowitsch PetscCall(VecGetArray(Dall, &array2));
8739d54b7f4SStefano Zampini for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]];
8749566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(Dall, &array2));
8759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scaling, &array));
8769566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
8779d54b7f4SStefano Zampini deluxe = PETSC_FALSE;
8789d54b7f4SStefano Zampini }
879d62866d3SStefano Zampini
880683d3df6SStefano Zampini /* size active schurs does not count any dirichlet or vertex dof on the interface */
8813fc34f97SStefano Zampini factor_workaround = PETSC_FALSE;
8823fc34f97SStefano Zampini schur_has_vertices = PETSC_FALSE;
883683d3df6SStefano Zampini cum = n_I + size_active_schur;
884683d3df6SStefano Zampini if (sub_schurs->is_dir) {
885683d3df6SStefano Zampini const PetscInt *idxs;
886683d3df6SStefano Zampini PetscInt n_dir;
887683d3df6SStefano Zampini
8889566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
8899566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
8909566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_dir));
891791bdc09SStefano Zampini if (multi_element)
892791bdc09SStefano Zampini for (PetscInt j = 0; j < n_dir; j++) all_local_subid_N[j + cum] = local_subs[idxs[j]];
8939566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
894683d3df6SStefano Zampini cum += n_dir;
89532fe681dSStefano Zampini if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
896d62866d3SStefano Zampini }
897683d3df6SStefano Zampini /* include the primal vertices in the Schur complement */
898367aa537SStefano Zampini if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
899683d3df6SStefano Zampini PetscInt n_v;
900683d3df6SStefano Zampini
9019566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
902683d3df6SStefano Zampini if (n_v) {
903683d3df6SStefano Zampini const PetscInt *idxs;
904683d3df6SStefano Zampini
9059566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
9069566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_v));
907791bdc09SStefano Zampini if (multi_element)
908791bdc09SStefano Zampini for (PetscInt j = 0; j < n_v; j++) all_local_subid_N[j + cum] = local_subs[idxs[j]];
9099566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
910683d3df6SStefano Zampini cum += n_v;
91132fe681dSStefano Zampini if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
9123fc34f97SStefano Zampini schur_has_vertices = PETSC_TRUE;
913683d3df6SStefano Zampini }
914683d3df6SStefano Zampini }
915683d3df6SStefano Zampini size_schur = cum - n_I;
9169566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all));
9177ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
918b470e4b4SRichard Tran Mills oldpin = sub_schurs->A->boundtocpu;
9199566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(sub_schurs->A, PETSC_TRUE));
9207ebab0bbSStefano Zampini #endif
921683d3df6SStefano Zampini if (cum == n) {
9229566063dSJacob Faibussowitsch PetscCall(ISSetPermutation(is_A_all));
9239566063dSJacob Faibussowitsch PetscCall(MatPermute(sub_schurs->A, is_A_all, is_A_all, &A));
924683d3df6SStefano Zampini } else {
9259566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A));
926683d3df6SStefano Zampini }
9277ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
9289566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(sub_schurs->A, oldpin));
9297ebab0bbSStefano Zampini #endif
93026cc229bSBarry Smith PetscCall(MatSetOptionsPrefixFactor(A, sub_schurs->prefix));
93126cc229bSBarry Smith PetscCall(MatAppendOptionsPrefixFactor(A, "sub_schurs_"));
932791bdc09SStefano Zampini /* subsets ordered last */
933791bdc09SStefano Zampini PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur));
934791bdc09SStefano Zampini
935791bdc09SStefano Zampini if (multi_element) {
936791bdc09SStefano Zampini PetscInt *idx_sub;
937791bdc09SStefano Zampini
938791bdc09SStefano Zampini PetscCall(PetscMalloc3(n_local_subs, &is_sub_all, n_local_subs, &is_sub_schur_all, n_local_subs, &is_sub_schur));
939791bdc09SStefano Zampini PetscCall(PetscMalloc1(n + size_schur, &idx_sub));
940791bdc09SStefano Zampini for (PetscInt sub = 0; sub < n_local_subs; sub++) {
941791bdc09SStefano Zampini PetscInt size_sub = 0, size_schur_sub = 0, size_I_sub;
942791bdc09SStefano Zampini
943791bdc09SStefano Zampini for (PetscInt j = 0; j < n_I; j++)
944791bdc09SStefano Zampini if (all_local_subid_N[j] == sub) idx_sub[size_sub++] = j;
945791bdc09SStefano Zampini size_I_sub = size_sub;
946791bdc09SStefano Zampini for (PetscInt j = n_I; j < n_I + size_schur; j++)
947791bdc09SStefano Zampini if (all_local_subid_N[j] == sub) {
948791bdc09SStefano Zampini idx_sub[size_sub++] = j;
949791bdc09SStefano Zampini idx_sub[n + size_schur_sub++] = j - n_I;
950791bdc09SStefano Zampini }
951791bdc09SStefano Zampini
952791bdc09SStefano Zampini PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size_sub, idx_sub, PETSC_COPY_VALUES, &is_sub_all[sub]));
953791bdc09SStefano Zampini PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size_schur_sub, idx_sub + n, PETSC_COPY_VALUES, &is_sub_schur[sub]));
954791bdc09SStefano Zampini PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur_sub, size_I_sub, 1, &is_sub_schur_all[sub]));
955791bdc09SStefano Zampini }
956791bdc09SStefano Zampini PetscCall(PetscFree(idx_sub));
957791bdc09SStefano Zampini }
958ca92afb2SStefano Zampini
959ca92afb2SStefano Zampini /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
9607ebab0bbSStefano Zampini this is a workaround */
961ca92afb2SStefano Zampini if (benign_n) {
9627ebab0bbSStefano Zampini Vec v, benign_AIIm1_ones;
963ca92afb2SStefano Zampini ISLocalToGlobalMapping N_to_reor;
964ca92afb2SStefano Zampini IS is_p0, is_p0_p;
9655cbda25cSStefano Zampini PetscScalar *cs_AIB, *AIIm1_data;
9665cbda25cSStefano Zampini PetscInt sizeA;
967ca92afb2SStefano Zampini
9689566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor));
9699566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
9709566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p));
9719566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_p0));
9729566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
9739566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &sizeA));
9749566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat));
9759566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat));
9769566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(cs_AIB_mat, &cs_AIB));
9779566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
9789566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(benign_n, &is_p_r));
979ca92afb2SStefano Zampini /* compute colsum of A_IB restricted to pressures */
980ca92afb2SStefano Zampini for (i = 0; i < benign_n; i++) {
9817ebab0bbSStefano Zampini const PetscScalar *array;
982ca92afb2SStefano Zampini const PetscInt *idxs;
983ca92afb2SStefano Zampini PetscInt j, nz;
984ca92afb2SStefano Zampini
9859566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i]));
9869566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_p_r[i], &nz));
9879566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_p_r[i], &idxs));
9885cbda25cSStefano Zampini for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.;
9899566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
9909566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
9919566063dSJacob Faibussowitsch PetscCall(MatMult(A, benign_AIIm1_ones, v));
9929566063dSJacob Faibussowitsch PetscCall(VecResetArray(benign_AIIm1_ones));
9939566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(v, &array));
99422db5ddcSStefano Zampini for (j = 0; j < size_schur; j++) {
99522db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX)
99622db5ddcSStefano Zampini cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz));
99722db5ddcSStefano Zampini #else
99822db5ddcSStefano Zampini cs_AIB[i * size_schur + j] = array[j + n_I] / nz;
99922db5ddcSStefano Zampini #endif
100022db5ddcSStefano Zampini }
10019566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(v, &array));
1002ca92afb2SStefano Zampini }
10039566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(cs_AIB_mat, &cs_AIB));
10049566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
10059566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v));
10069566063dSJacob Faibussowitsch PetscCall(VecDestroy(&benign_AIIm1_ones));
10079566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
10089566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
10099566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
10109566063dSJacob Faibussowitsch PetscCall(MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL));
10119566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_p0_p));
10129566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor));
1013ca92afb2SStefano Zampini }
10149566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric));
10159566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian));
10169566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_SPD, sub_schurs->is_posdef));
1017883469d8SStefano Zampini
101811955456SStefano Zampini /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
101911955456SStefano Zampini use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
102011955456SStefano Zampini
1021683d3df6SStefano Zampini /* when using the benign subspace trick, the local Schur complements are SPD */
102235d0533cSStefano Zampini /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
102335d0533cSStefano Zampini Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
102435d0533cSStefano Zampini if (benign_trick) {
102535d0533cSStefano Zampini sub_schurs->is_posdef = PETSC_TRUE;
10269566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg));
102735d0533cSStefano Zampini if (flg) use_cholesky = PETSC_FALSE;
102835d0533cSStefano Zampini }
102979329b78SStefano Zampini if (sub_schurs->mat_factor_type == MAT_FACTOR_NONE) sub_schurs->mat_factor_type = use_cholesky ? MAT_FACTOR_CHOLESKY : MAT_FACTOR_LU;
1030d47842beSStefano Zampini
1031791bdc09SStefano Zampini if (n_I && !multi_element) {
10327ebab0bbSStefano Zampini char stype[64];
10334ba54290SStefano Zampini PetscBool gpu = PETSC_FALSE;
10345a05ddb0SStefano Zampini
103579329b78SStefano Zampini PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, sub_schurs->mat_factor_type, &F));
103603e5aca4SStefano Zampini PetscCheck(F, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)A)->type_name);
10379566063dSJacob Faibussowitsch PetscCall(MatSetErrorIfFailure(A, PETSC_TRUE));
103835d0533cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
10399566063dSJacob Faibussowitsch if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10));
104035d0533cSStefano Zampini #endif
10419566063dSJacob Faibussowitsch PetscCall(MatFactorSetSchurIS(F, is_schur));
1042883469d8SStefano Zampini
1043883469d8SStefano Zampini /* factorization step */
104479329b78SStefano Zampini switch (sub_schurs->mat_factor_type) {
104579329b78SStefano Zampini case MAT_FACTOR_CHOLESKY:
10469566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL));
104779578405SBarry Smith /* be sure that icntl 19 is not set by command line */
10489566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 19, 2));
10499566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactorNumeric(F, A, NULL));
1050a0b0af32SStefano Zampini S_lower_triangular = PETSC_TRUE;
105179329b78SStefano Zampini break;
105279329b78SStefano Zampini case MAT_FACTOR_LU:
10539566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL));
105479578405SBarry Smith /* be sure that icntl 19 is not set by command line */
10559566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 19, 3));
10569566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(F, A, NULL));
105779329b78SStefano Zampini break;
105879329b78SStefano Zampini default:
105979329b78SStefano Zampini SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
1060883469d8SStefano Zampini }
10619566063dSJacob Faibussowitsch PetscCall(MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view"));
1062883469d8SStefano Zampini
10633b03f7bbSStefano Zampini if (matl_dbg_viewer) {
106411955456SStefano Zampini Mat S;
106511955456SStefano Zampini IS is;
106611955456SStefano Zampini
10679566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)A, "A"));
10689566063dSJacob Faibussowitsch PetscCall(MatView(A, matl_dbg_viewer));
10699566063dSJacob Faibussowitsch PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
10709566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S, "S"));
10719566063dSJacob Faibussowitsch PetscCall(MatView(S, matl_dbg_viewer));
10729566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S));
10739566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is));
10749566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)is, "I"));
10759566063dSJacob Faibussowitsch PetscCall(ISView(is, matl_dbg_viewer));
10769566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is));
10779566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is));
10789566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)is, "B"));
10799566063dSJacob Faibussowitsch PetscCall(ISView(is, matl_dbg_viewer));
10809566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is));
10819566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)is_A_all, "IA"));
10829566063dSJacob Faibussowitsch PetscCall(ISView(is_A_all, matl_dbg_viewer));
108332fe681dSStefano Zampini for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) {
108432fe681dSStefano Zampini IS is;
108532fe681dSStefano Zampini char name[16];
108632fe681dSStefano Zampini
108732fe681dSStefano Zampini PetscCall(PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i));
108832fe681dSStefano Zampini PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
108932fe681dSStefano Zampini PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is));
109032fe681dSStefano Zampini PetscCall(PetscObjectSetName((PetscObject)is, name));
109132fe681dSStefano Zampini PetscCall(ISView(is, matl_dbg_viewer));
109232fe681dSStefano Zampini PetscCall(ISDestroy(&is));
109332fe681dSStefano Zampini if (sub_schurs->change) {
109432fe681dSStefano Zampini Mat T;
109532fe681dSStefano Zampini
109632fe681dSStefano Zampini PetscCall(PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i));
109732fe681dSStefano Zampini PetscCall(KSPGetOperators(sub_schurs->change[i], &T, NULL));
109832fe681dSStefano Zampini PetscCall(PetscObjectSetName((PetscObject)T, name));
109932fe681dSStefano Zampini PetscCall(MatView(T, matl_dbg_viewer));
110032fe681dSStefano Zampini PetscCall(PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i));
110132fe681dSStefano Zampini PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name));
110232fe681dSStefano Zampini PetscCall(ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer));
110332fe681dSStefano Zampini }
110432fe681dSStefano Zampini cum += subset_size;
110532fe681dSStefano Zampini }
110632fe681dSStefano Zampini PetscCall(PetscViewerFlush(matl_dbg_viewer));
110711955456SStefano Zampini }
110811955456SStefano Zampini
1109883469d8SStefano Zampini /* get explicit Schur Complement computed during numeric factorization */
11109566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
11119566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(stype, MATSEQDENSE, sizeof(stype)));
11124ba54290SStefano Zampini #if defined(PETSC_HAVE_CUDA)
11139566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, ""));
11144ba54290SStefano Zampini #endif
11151baa6e33SBarry Smith if (gpu) PetscCall(PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype)));
11169566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL));
11179566063dSJacob Faibussowitsch PetscCall(MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all));
11189566063dSJacob Faibussowitsch PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef));
11199566063dSJacob Faibussowitsch PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian));
11209566063dSJacob Faibussowitsch PetscCall(MatGetType(S_all, &Stype));
1121b3cb21ddSStefano Zampini
1122d62866d3SStefano Zampini /* we can reuse the solvers if we are not using the economic version */
1123791bdc09SStefano Zampini reuse_solvers = (PetscBool)(reuse_solvers && !economic && !sub_schurs->graph->multi_element);
112432fe681dSStefano Zampini if (!sub_schurs->gdsw) {
1125683d3df6SStefano Zampini factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
11269371c9d4SSatish Balay if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE;
112732fe681dSStefano Zampini }
1128df4d28bfSStefano Zampini solver_S = PETSC_TRUE;
1129ca92afb2SStefano Zampini
113072b8c272SStefano Zampini /* update the Schur complement with the change of basis on the pressures */
1131ca92afb2SStefano Zampini if (benign_n) {
11327ebab0bbSStefano Zampini const PetscScalar *cs_AIB;
11337ebab0bbSStefano Zampini PetscScalar *S_data, *AIIm1_data;
11343b03f7bbSStefano Zampini Mat S2 = NULL, S3 = NULL; /* dbg */
11353b03f7bbSStefano Zampini PetscScalar *S2_data, *S3_data; /* dbg */
11367ebab0bbSStefano Zampini Vec v, benign_AIIm1_ones;
11375cbda25cSStefano Zampini PetscInt sizeA;
1138ca92afb2SStefano Zampini
11399566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all, &S_data));
11409566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
11419566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &sizeA));
11429566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 26, 0));
1143ca92afb2SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
11449566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(F, 70, 1));
1145ca92afb2SStefano Zampini #endif
11469566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB));
11479566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
11483b03f7bbSStefano Zampini if (matl_dbg_viewer) {
11499566063dSJacob Faibussowitsch PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2));
11509566063dSJacob Faibussowitsch PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3));
11519566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S2, &S2_data));
11529566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S3, &S3_data));
11533b03f7bbSStefano Zampini }
1154ca92afb2SStefano Zampini for (i = 0; i < benign_n; i++) {
11553b03f7bbSStefano Zampini PetscScalar *array, sum = 0., one = 1., *sums;
1156ca92afb2SStefano Zampini const PetscInt *idxs;
11573b03f7bbSStefano Zampini PetscInt k, j, nz;
115847484b83SStefano Zampini PetscBLASInt B_k, B_n;
1159ca92afb2SStefano Zampini
11609566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(benign_n, &sums));
11619566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
11629566063dSJacob Faibussowitsch PetscCall(VecCopy(benign_AIIm1_ones, v));
11639566063dSJacob Faibussowitsch PetscCall(MatSolve(F, v, benign_AIIm1_ones));
11649566063dSJacob Faibussowitsch PetscCall(MatMult(A, benign_AIIm1_ones, v));
11659566063dSJacob Faibussowitsch PetscCall(VecResetArray(benign_AIIm1_ones));
11663b03f7bbSStefano Zampini /* p0 dofs (eliminated) are excluded from the sums */
11673b03f7bbSStefano Zampini for (k = 0; k < benign_n; k++) {
11689566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_p_r[k], &nz));
11699566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_p_r[k], &idxs));
11703b03f7bbSStefano Zampini for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i];
11719566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_p_r[k], &idxs));
11723b03f7bbSStefano Zampini }
11739566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
11743b03f7bbSStefano Zampini if (matl_dbg_viewer) {
11753b03f7bbSStefano Zampini Vec vv;
11763b03f7bbSStefano Zampini char name[16];
11773b03f7bbSStefano Zampini
11789566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv));
117963a3b9bcSJacob Faibussowitsch PetscCall(PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i));
11809566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)vv, name));
11819566063dSJacob Faibussowitsch PetscCall(VecView(vv, matl_dbg_viewer));
11823b03f7bbSStefano Zampini }
118347484b83SStefano Zampini /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
118447484b83SStefano Zampini /* cs_AIB already scaled by 1./nz */
118547484b83SStefano Zampini B_k = 1;
1186f9635d15SStefano Zampini PetscCall(PetscBLASIntCast(size_schur, &B_n));
11873b03f7bbSStefano Zampini for (k = 0; k < benign_n; k++) {
11883b03f7bbSStefano Zampini sum = sums[k];
11893b03f7bbSStefano Zampini
11903b03f7bbSStefano Zampini if (PetscAbsScalar(sum) == 0.0) continue;
11913b03f7bbSStefano Zampini if (k == i) {
1192f9635d15SStefano Zampini if (B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1193f9635d15SStefano Zampini if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
11943b03f7bbSStefano Zampini } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
11953b03f7bbSStefano Zampini sum /= 2.0;
1196f9635d15SStefano Zampini if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1197f9635d15SStefano Zampini if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
11983b03f7bbSStefano Zampini }
11993b03f7bbSStefano Zampini }
12005cbda25cSStefano Zampini sum = 1.;
1201f9635d15SStefano Zampini if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1202f9635d15SStefano Zampini if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n));
12039566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
12045cbda25cSStefano Zampini /* set p0 entry of AIIm1_ones to zero */
12059566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_p_r[i], &nz));
12069566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_p_r[i], &idxs));
1207282d6408SStefano Zampini for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.;
12089566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
12099566063dSJacob Faibussowitsch PetscCall(PetscFree(sums));
12103b03f7bbSStefano Zampini }
12119566063dSJacob Faibussowitsch PetscCall(VecDestroy(&benign_AIIm1_ones));
12123b03f7bbSStefano Zampini if (matl_dbg_viewer) {
12139566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S2, &S2_data));
12149566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S3, &S3_data));
1215ca92afb2SStefano Zampini }
12165e116b59SBarry Smith if (!S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */
1217a7414863SStefano Zampini PetscInt k, j;
1218a7414863SStefano Zampini for (k = 0; k < size_schur; k++) {
1219ad540459SPierre Jolivet for (j = k; j < size_schur; j++) S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]);
1220a7414863SStefano Zampini }
1221a7414863SStefano Zampini }
1222a7414863SStefano Zampini
12235cbda25cSStefano Zampini /* restore defaults */
12249566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 26, -1));
12255cbda25cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
12269566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(F, 70, 0));
12275cbda25cSStefano Zampini #endif
12289566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB));
12299566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
12309566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v));
12319566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all, &S_data));
12323b03f7bbSStefano Zampini if (matl_dbg_viewer) {
12333b03f7bbSStefano Zampini Mat S;
12343b03f7bbSStefano Zampini
12359566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
12369566063dSJacob Faibussowitsch PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
12379566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S, "Sb"));
12389566063dSJacob Faibussowitsch PetscCall(MatView(S, matl_dbg_viewer));
12399566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S));
12409566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S2, "S2P"));
12419566063dSJacob Faibussowitsch PetscCall(MatView(S2, matl_dbg_viewer));
12429566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S3, "S3P"));
12439566063dSJacob Faibussowitsch PetscCall(MatView(S3, matl_dbg_viewer));
12449566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat, "cs"));
12459566063dSJacob Faibussowitsch PetscCall(MatView(cs_AIB_mat, matl_dbg_viewer));
12469566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
12473b03f7bbSStefano Zampini }
12489566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S2));
12499566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S3));
1250ca92afb2SStefano Zampini }
1251a3df083aSStefano Zampini if (!reuse_solvers) {
125248a46eb9SPierre Jolivet for (i = 0; i < benign_n; i++) PetscCall(ISDestroy(&is_p_r[i]));
12539566063dSJacob Faibussowitsch PetscCall(PetscFree(is_p_r));
12549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&cs_AIB_mat));
12559566063dSJacob Faibussowitsch PetscCall(MatDestroy(&benign_AIIm1_ones_mat));
1256a3df083aSStefano Zampini }
1257791bdc09SStefano Zampini } else if (multi_element) { /* MUMPS does not support sparse Schur complements. Loop over local subs */
1258791bdc09SStefano Zampini PetscInt *nnz;
1259791bdc09SStefano Zampini const PetscInt *idxs;
1260791bdc09SStefano Zampini PetscInt size_schur_sub;
1261791bdc09SStefano Zampini
1262791bdc09SStefano Zampini PetscCall(PetscCalloc1(size_schur, &nnz));
1263791bdc09SStefano Zampini for (PetscInt sub = 0; sub < n_local_subs; sub++) {
1264791bdc09SStefano Zampini PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub));
1265791bdc09SStefano Zampini PetscCall(ISGetIndices(is_sub_schur[sub], &idxs));
1266791bdc09SStefano Zampini for (PetscInt j = 0; j < size_schur_sub; j++) nnz[idxs[j]] = size_schur_sub;
1267791bdc09SStefano Zampini PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs));
1268791bdc09SStefano Zampini }
1269791bdc09SStefano Zampini PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, size_schur, size_schur, 0, nnz, &S_all));
1270791bdc09SStefano Zampini PetscCall(MatSetOption(S_all, MAT_ROW_ORIENTED, sub_schurs->is_hermitian));
1271791bdc09SStefano Zampini PetscCall(PetscFree(nnz));
1272791bdc09SStefano Zampini
1273791bdc09SStefano Zampini for (PetscInt sub = 0; sub < n_local_subs; sub++) {
1274791bdc09SStefano Zampini Mat Asub, Ssub;
1275791bdc09SStefano Zampini const PetscScalar *vals;
1276f7b1b682SStefano Zampini PetscInt size_all_sub;
1277791bdc09SStefano Zampini
1278f7b1b682SStefano Zampini F = NULL;
1279f7b1b682SStefano Zampini PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub));
1280f7b1b682SStefano Zampini PetscCall(ISGetLocalSize(is_sub_all[sub], &size_all_sub));
1281791bdc09SStefano Zampini PetscCall(MatCreateSubMatrix(A, is_sub_all[sub], is_sub_all[sub], MAT_INITIAL_MATRIX, &Asub));
1282f7b1b682SStefano Zampini if (size_schur_sub == size_all_sub) {
1283f7b1b682SStefano Zampini /* we can't use MatFactor when size_schur == size_of_the_problem */
1284f7b1b682SStefano Zampini PetscCall(MatConvert(Asub, MATDENSE, MAT_INITIAL_MATRIX, &Ssub));
1285f7b1b682SStefano Zampini } else {
1286791bdc09SStefano Zampini PetscCall(MatGetFactor(Asub, sub_schurs->mat_solver_type, sub_schurs->mat_factor_type, &F));
1287791bdc09SStefano Zampini PetscCheck(F, PetscObjectComm((PetscObject)Asub), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)Asub)->type_name);
1288791bdc09SStefano Zampini PetscCall(MatSetErrorIfFailure(Asub, PETSC_TRUE));
1289791bdc09SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
1290791bdc09SStefano Zampini if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10));
1291791bdc09SStefano Zampini #endif
1292791bdc09SStefano Zampini /* subsets ordered last */
1293791bdc09SStefano Zampini PetscCall(MatFactorSetSchurIS(F, is_sub_schur_all[sub]));
1294791bdc09SStefano Zampini
1295791bdc09SStefano Zampini /* factorization step */
1296791bdc09SStefano Zampini switch (sub_schurs->mat_factor_type) {
1297791bdc09SStefano Zampini case MAT_FACTOR_CHOLESKY:
1298791bdc09SStefano Zampini PetscCall(MatCholeskyFactorSymbolic(F, Asub, NULL, NULL));
1299791bdc09SStefano Zampini /* be sure that icntl 19 is not set by command line */
1300791bdc09SStefano Zampini PetscCall(MatMumpsSetIcntl(F, 19, 2));
1301791bdc09SStefano Zampini PetscCall(MatCholeskyFactorNumeric(F, Asub, NULL));
1302791bdc09SStefano Zampini S_lower_triangular = PETSC_TRUE;
1303791bdc09SStefano Zampini break;
1304791bdc09SStefano Zampini case MAT_FACTOR_LU:
1305791bdc09SStefano Zampini PetscCall(MatLUFactorSymbolic(F, Asub, NULL, NULL, NULL));
1306791bdc09SStefano Zampini /* be sure that icntl 19 is not set by command line */
1307791bdc09SStefano Zampini PetscCall(MatMumpsSetIcntl(F, 19, 3));
1308791bdc09SStefano Zampini PetscCall(MatLUFactorNumeric(F, Asub, NULL));
1309791bdc09SStefano Zampini break;
1310791bdc09SStefano Zampini default:
1311791bdc09SStefano Zampini SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
1312791bdc09SStefano Zampini }
1313791bdc09SStefano Zampini PetscCall(MatFactorCreateSchurComplement(F, &Ssub, NULL));
1314f7b1b682SStefano Zampini }
1315f7b1b682SStefano Zampini PetscCall(MatDestroy(&Asub));
1316791bdc09SStefano Zampini PetscCall(MatDenseGetArrayRead(Ssub, &vals));
1317791bdc09SStefano Zampini PetscCall(ISGetIndices(is_sub_schur[sub], &idxs));
1318791bdc09SStefano Zampini PetscCall(MatSetValues(S_all, size_schur_sub, idxs, size_schur_sub, idxs, vals, INSERT_VALUES));
1319791bdc09SStefano Zampini PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs));
1320791bdc09SStefano Zampini PetscCall(MatDenseRestoreArrayRead(Ssub, &vals));
1321791bdc09SStefano Zampini PetscCall(MatDestroy(&Ssub));
1322791bdc09SStefano Zampini PetscCall(MatDestroy(&F));
1323791bdc09SStefano Zampini }
1324791bdc09SStefano Zampini PetscCall(MatAssemblyBegin(S_all, MAT_FINAL_ASSEMBLY));
1325791bdc09SStefano Zampini PetscCall(MatAssemblyEnd(S_all, MAT_FINAL_ASSEMBLY));
1326791bdc09SStefano Zampini PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef));
1327791bdc09SStefano Zampini PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian));
1328791bdc09SStefano Zampini Stype = MATDENSE;
1329791bdc09SStefano Zampini reuse_solvers = PETSC_FALSE;
1330791bdc09SStefano Zampini factor_workaround = PETSC_FALSE;
1331791bdc09SStefano Zampini solver_S = PETSC_FALSE;
1332df4d28bfSStefano Zampini } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
13339566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all));
13349566063dSJacob Faibussowitsch PetscCall(MatGetType(S_all, &Stype));
1335683d3df6SStefano Zampini reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1336166598c1SStefano Zampini factor_workaround = PETSC_FALSE;
1337df4d28bfSStefano Zampini solver_S = PETSC_FALSE;
1338be83ff47SStefano Zampini }
1339be83ff47SStefano Zampini
1340be83ff47SStefano Zampini if (reuse_solvers) {
134132fe681dSStefano Zampini Mat A_II, pA_II, Afake;
134253892102SStefano Zampini Vec vec1_B;
1343df4d28bfSStefano Zampini PCBDDCReuseSolvers msolv_ctx;
13443462e049SStefano Zampini PetscInt n_R;
1345d5574798SStefano Zampini
1346df4d28bfSStefano Zampini if (sub_schurs->reuse_solver) {
13479566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1348e28d306cSStefano Zampini } else {
13499566063dSJacob Faibussowitsch PetscCall(PetscNew(&sub_schurs->reuse_solver));
1350d62866d3SStefano Zampini }
1351df4d28bfSStefano Zampini msolv_ctx = sub_schurs->reuse_solver;
135232fe681dSStefano Zampini PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL));
13539566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)F));
1354d5574798SStefano Zampini msolv_ctx->F = F;
13559566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(F, &msolv_ctx->sol, NULL));
1356683d3df6SStefano Zampini /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1357683d3df6SStefano Zampini {
1358683d3df6SStefano Zampini PetscScalar *array;
1359683d3df6SStefano Zampini PetscInt n;
1360683d3df6SStefano Zampini
13619566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(msolv_ctx->sol, &n));
13629566063dSJacob Faibussowitsch PetscCall(VecGetArray(msolv_ctx->sol, &array));
13639566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs));
13649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(msolv_ctx->sol, &array));
1365683d3df6SStefano Zampini }
13663fc34f97SStefano Zampini msolv_ctx->has_vertices = schur_has_vertices;
1367d62866d3SStefano Zampini
1368d62866d3SStefano Zampini /* interior solver */
13699566063dSJacob Faibussowitsch PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver));
137032fe681dSStefano Zampini PetscCall(PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II));
13719566063dSJacob Faibussowitsch PetscCall(PCSetType(msolv_ctx->interior_solver, PCSHELL));
13729566063dSJacob Faibussowitsch PetscCall(PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)"));
13739566063dSJacob Faibussowitsch PetscCall(PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx));
13749566063dSJacob Faibussowitsch PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
13759566063dSJacob Faibussowitsch PetscCall(PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior));
13769566063dSJacob Faibussowitsch PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose));
137732fe681dSStefano Zampini if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy));
1378d62866d3SStefano Zampini
1379d62866d3SStefano Zampini /* correction solver */
138032fe681dSStefano Zampini if (!sub_schurs->gdsw) {
13819566063dSJacob Faibussowitsch PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver));
13829566063dSJacob Faibussowitsch PetscCall(PCSetType(msolv_ctx->correction_solver, PCSHELL));
13839566063dSJacob Faibussowitsch PetscCall(PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)"));
13849566063dSJacob Faibussowitsch PetscCall(PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx));
13859566063dSJacob Faibussowitsch PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
13869566063dSJacob Faibussowitsch PetscCall(PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction));
13879566063dSJacob Faibussowitsch PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose));
138853892102SStefano Zampini
138953892102SStefano Zampini /* scatter and vecs for Schur complement solver */
13909566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B));
13919566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(sub_schurs->S, &vec1_B, NULL));
13923fc34f97SStefano Zampini if (!schur_has_vertices) {
13939566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B));
13949566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B));
13959566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)is_A_all));
139653892102SStefano Zampini msolv_ctx->is_R = is_A_all;
1397683d3df6SStefano Zampini } else {
1398683d3df6SStefano Zampini IS is_B_all;
1399683d3df6SStefano Zampini const PetscInt *idxs;
1400683d3df6SStefano Zampini PetscInt dual, n_v, n;
1401683d3df6SStefano Zampini
14029566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
1403683d3df6SStefano Zampini dual = size_schur - n_v;
14049566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_A_all, &n));
14059566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_A_all, &idxs));
14069566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all));
14079566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B));
14089566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_B_all));
14099566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all));
14109566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B));
14119566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_B_all));
14129566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R));
14139566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_A_all, &idxs));
1414683d3df6SStefano Zampini }
14159566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(msolv_ctx->is_R, &n_R));
14169566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake));
14179566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY));
14189566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY));
14199566063dSJacob Faibussowitsch PetscCall(PCSetOperators(msolv_ctx->correction_solver, Afake, Afake));
14209566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Afake));
14219566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vec1_B));
142232fe681dSStefano Zampini }
1423ca92afb2SStefano Zampini /* communicate benign info to solver context */
1424ca92afb2SStefano Zampini if (benign_n) {
14255cbda25cSStefano Zampini PetscScalar *array;
14265cbda25cSStefano Zampini
1427ca92afb2SStefano Zampini msolv_ctx->benign_n = benign_n;
1428ca92afb2SStefano Zampini msolv_ctx->benign_zerodiag_subs = is_p_r;
14299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals));
14305cbda25cSStefano Zampini msolv_ctx->benign_csAIB = cs_AIB_mat;
14319566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL));
14329566063dSJacob Faibussowitsch PetscCall(VecGetArray(msolv_ctx->benign_corr_work, &array));
14339566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec));
14349566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work, &array));
14355cbda25cSStefano Zampini msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1436ca92afb2SStefano Zampini }
1437ada6e2d7SStefano Zampini } else {
14381baa6e33SBarry Smith if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
14399566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->reuse_solver));
1440d5574798SStefano Zampini }
14419566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A));
14429566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_A_all));
14435db18549SStefano Zampini
1444be83ff47SStefano Zampini /* Work arrays */
14459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(max_subset_size * max_subset_size, &work));
1446d2627357SStefano Zampini
1447be83ff47SStefano Zampini /* S_Ej_all */
1448791bdc09SStefano Zampini PetscInt *idx_work = NULL;
1449be83ff47SStefano Zampini cum = cum2 = 0;
1450791bdc09SStefano Zampini if (!multi_element) PetscCall(MatDenseGetArrayRead(S_all, &rS_data));
1451791bdc09SStefano Zampini else PetscCall(PetscMalloc1(max_subset_size, &idx_work));
14529566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr));
145348a46eb9SPierre Jolivet if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr));
145448a46eb9SPierre Jolivet if (sub_schurs->gdsw) PetscCall(MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA));
145565d8bf0aSStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) {
145632fe681dSStefano Zampini /* get S_E (or K^i_EE for GDSW) */
14579566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
145832fe681dSStefano Zampini if (sub_schurs->gdsw) {
145932fe681dSStefano Zampini Mat T;
146032fe681dSStefano Zampini
146132fe681dSStefano Zampini PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T));
146232fe681dSStefano Zampini PetscCall(MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T));
146332fe681dSStefano Zampini PetscCall(MatDestroy(&T));
146432fe681dSStefano Zampini } else {
1465791bdc09SStefano Zampini if (multi_element) { /* transpose copy to workspace */
1466791bdc09SStefano Zampini // XXX CSR directly?
1467791bdc09SStefano Zampini for (PetscInt j = 0; j < subset_size; j++) idx_work[j] = cum + j;
1468791bdc09SStefano Zampini PetscCall(MatGetValues(S_all, subset_size, idx_work, subset_size, idx_work, work));
1469791bdc09SStefano Zampini if (!sub_schurs->is_hermitian) {
1470791bdc09SStefano Zampini for (PetscInt k = 0; k < subset_size; k++) {
1471791bdc09SStefano Zampini for (PetscInt j = k; j < subset_size; j++) {
1472791bdc09SStefano Zampini PetscScalar t = work[k * subset_size + j];
1473791bdc09SStefano Zampini work[k * subset_size + j] = work[j * subset_size + k];
1474791bdc09SStefano Zampini work[j * subset_size + k] = t;
1475791bdc09SStefano Zampini }
1476791bdc09SStefano Zampini }
1477791bdc09SStefano Zampini }
1478791bdc09SStefano Zampini } else if (S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */
1479791bdc09SStefano Zampini for (PetscInt k = 0; k < subset_size; k++) {
1480791bdc09SStefano Zampini for (PetscInt j = k; j < subset_size; j++) {
14811683a169SBarry Smith work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
14821683a169SBarry Smith work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]);
1483be83ff47SStefano Zampini }
1484be83ff47SStefano Zampini }
148506a4e24aSStefano Zampini } else { /* just copy to workspace */
1486791bdc09SStefano Zampini for (PetscInt k = 0; k < subset_size; k++) {
1487791bdc09SStefano Zampini for (PetscInt j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1488be83ff47SStefano Zampini }
14899087bf02SStefano Zampini }
149032fe681dSStefano Zampini }
14915a95e1ceSStefano Zampini /* insert S_E values */
1492b7ab4a40SStefano Zampini if (sub_schurs->change) {
14938760537fSStefano Zampini Mat change_sub, SEj, T;
149472b8c272SStefano Zampini
149572b8c272SStefano Zampini /* change basis */
14969566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
14979566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
14988760537fSStefano Zampini if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
14998760537fSStefano Zampini Mat T2;
15009566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
15019566063dSJacob Faibussowitsch PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
15029566063dSJacob Faibussowitsch PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
15039566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T2));
15048760537fSStefano Zampini } else {
15059566063dSJacob Faibussowitsch PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
150672b8c272SStefano Zampini }
15079566063dSJacob Faibussowitsch PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
15089566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T));
15099566063dSJacob Faibussowitsch PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL));
15109566063dSJacob Faibussowitsch PetscCall(MatDestroy(&SEj));
151172b8c272SStefano Zampini }
15129566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1513862806e4SStefano Zampini if (compute_Stilda) {
151432fe681dSStefano Zampini if (deluxe) { /* if adaptivity is requested, invert S_E blocks */
15157ebab0bbSStefano Zampini Mat M;
15167ebab0bbSStefano Zampini const PetscScalar *vals;
15177ebab0bbSStefano Zampini PetscBool isdense, isdensecuda;
1518f4f7d9d6SStefano Zampini
15199566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M));
15209566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef));
15219566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian));
152248a46eb9SPierre Jolivet if (!PetscBTLookup(sub_schurs->is_edge, i)) PetscCall(MatSetType(M, Stype));
15239566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense));
15249566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda));
152579329b78SStefano Zampini switch (sub_schurs->mat_factor_type) {
152679329b78SStefano Zampini case MAT_FACTOR_CHOLESKY:
15279566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(M, NULL, NULL));
152879329b78SStefano Zampini break;
152979329b78SStefano Zampini case MAT_FACTOR_LU:
15309566063dSJacob Faibussowitsch PetscCall(MatLUFactor(M, NULL, NULL, NULL));
153179329b78SStefano Zampini break;
153279329b78SStefano Zampini default:
153379329b78SStefano Zampini SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
15342972d61bSStefano Zampini }
15357ebab0bbSStefano Zampini if (isdense) {
15369566063dSJacob Faibussowitsch PetscCall(MatSeqDenseInvertFactors_Private(M));
15377ebab0bbSStefano Zampini #if defined(PETSC_HAVE_CUDA)
15387ebab0bbSStefano Zampini } else if (isdensecuda) {
15394742e46bSJacob Faibussowitsch PetscCall(MatSeqDenseCUDAInvertFactors_Internal(M));
15407ebab0bbSStefano Zampini #endif
154198921bdaSJacob Faibussowitsch } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype);
15429566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(M, &vals));
15439566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size));
15449566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(M, &vals));
15459566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M));
154632fe681dSStefano Zampini } else if (scaling) { /* not using deluxe */
15479d54b7f4SStefano Zampini Mat SEj;
15489d54b7f4SStefano Zampini Vec D;
15499d54b7f4SStefano Zampini PetscScalar *array;
15509d54b7f4SStefano Zampini
15519566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
15529566063dSJacob Faibussowitsch PetscCall(VecGetArray(Dall, &array));
15539566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D));
15549566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(Dall, &array));
15559566063dSJacob Faibussowitsch PetscCall(VecShift(D, -1.));
15569566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(SEj, D, D));
15579566063dSJacob Faibussowitsch PetscCall(MatDestroy(&SEj));
15589566063dSJacob Faibussowitsch PetscCall(VecDestroy(&D));
15599566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
15609d54b7f4SStefano Zampini }
156132fe681dSStefano Zampini }
1562be83ff47SStefano Zampini cum += subset_size;
1563be83ff47SStefano Zampini cum2 += subset_size * (size_schur + 1);
156404c5b2e6SStefano Zampini SEj_arr += subset_size * subset_size;
156504c5b2e6SStefano Zampini if (SEjinv_arr) SEjinv_arr += subset_size * subset_size;
1566be83ff47SStefano Zampini }
156748a46eb9SPierre Jolivet if (sub_schurs->gdsw) PetscCall(MatDestroySubMatrices(sub_schurs->n_subs, &gdswA));
1568791bdc09SStefano Zampini if (!multi_element) PetscCall(MatDenseRestoreArrayRead(S_all, &rS_data));
15699566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr));
157048a46eb9SPierre Jolivet if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr));
157148a46eb9SPierre Jolivet if (solver_S) PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1572683d3df6SStefano Zampini
15737ebab0bbSStefano Zampini /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
15747ebab0bbSStefano Zampini however, preliminary tests indicate using GPUs is still faster in the solve phase */
15757ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
15767ebab0bbSStefano Zampini if (reuse_solvers) {
15777ebab0bbSStefano Zampini Mat St;
15787ebab0bbSStefano Zampini MatFactorSchurStatus st;
15797ebab0bbSStefano Zampini
158035d0533cSStefano Zampini flg = PETSC_FALSE;
15819566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL));
15829566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &St, &st));
15839566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(St, flg));
15849566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &St, st));
15857ebab0bbSStefano Zampini }
15867ebab0bbSStefano Zampini #endif
15877ebab0bbSStefano Zampini
1588683d3df6SStefano Zampini schur_factor = NULL;
158945951f25SStefano Zampini if (compute_Stilda && size_active_schur) {
15909d54b7f4SStefano Zampini if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
159132fe681dSStefano Zampini PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
15929566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEjinv_arr, work, size_schur * size_schur));
159332fe681dSStefano Zampini PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
15944a6c6b0dSStefano Zampini } else {
1595683d3df6SStefano Zampini Mat S_all_inv = NULL;
15967ebab0bbSStefano Zampini
159732fe681dSStefano Zampini if (solver_S && !sub_schurs->gdsw) {
1598683d3df6SStefano Zampini /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1599683d3df6SStefano Zampini The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
16003fc34f97SStefano Zampini if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1601683d3df6SStefano Zampini PetscScalar *data;
1602683d3df6SStefano Zampini PetscInt nd = 0;
16036dba178dSStefano Zampini
16047a46b595SBarry Smith PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
16059566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
16069566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all_inv, &data));
1607683d3df6SStefano Zampini if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
16089566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1609683d3df6SStefano Zampini }
16103fc34f97SStefano Zampini
16113fc34f97SStefano Zampini /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
16123fc34f97SStefano Zampini if (schur_has_vertices) {
16133fc34f97SStefano Zampini Mat M;
16143fc34f97SStefano Zampini PetscScalar *tdata;
16153fc34f97SStefano Zampini PetscInt nv = 0, news;
16163fc34f97SStefano Zampini
16179566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
16183fc34f97SStefano Zampini news = size_active_schur + nv;
16199566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(news * news, &tdata));
1620683d3df6SStefano Zampini for (i = 0; i < size_active_schur; i++) {
16219566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i));
16229566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv));
16233fc34f97SStefano Zampini }
16243fc34f97SStefano Zampini for (i = 0; i < nv; i++) {
16253fc34f97SStefano Zampini PetscInt k = i + size_active_schur;
16269566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i));
16273fc34f97SStefano Zampini }
16283fc34f97SStefano Zampini
16299566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M));
16309566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
16319566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(M, NULL, NULL));
16323fc34f97SStefano Zampini /* save the factors */
16333fc34f97SStefano Zampini cum = 0;
16349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor));
16353fc34f97SStefano Zampini for (i = 0; i < size_active_schur; i++) {
16369566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i));
1637683d3df6SStefano Zampini cum += size_active_schur - i;
1638683d3df6SStefano Zampini }
16393fc34f97SStefano Zampini for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)]));
1640791bdc09SStefano Zampini S_all_inv->ops->solve = M->ops->solve;
1641791bdc09SStefano Zampini S_all_inv->ops->matsolve = M->ops->matsolve;
1642791bdc09SStefano Zampini S_all_inv->ops->solvetranspose = M->ops->solvetranspose;
1643791bdc09SStefano Zampini S_all_inv->ops->matsolvetranspose = M->ops->matsolvetranspose;
1644791bdc09SStefano Zampini S_all_inv->factortype = MAT_FACTOR_CHOLESKY;
16459566063dSJacob Faibussowitsch PetscCall(MatSeqDenseInvertFactors_Private(M));
16463fc34f97SStefano Zampini /* move back just the active dofs to the Schur complement */
164748a46eb9SPierre Jolivet for (i = 0; i < size_active_schur; i++) PetscCall(PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur));
16489566063dSJacob Faibussowitsch PetscCall(PetscFree(tdata));
16499566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M));
16503fc34f97SStefano Zampini } else { /* we can factorize and invert just the activedofs */
16513fc34f97SStefano Zampini Mat M;
16525002105bSStefano Zampini PetscScalar *aux;
16533fc34f97SStefano Zampini
16549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nd, &aux));
16555002105bSStefano Zampini for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)];
16569566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M));
16579566063dSJacob Faibussowitsch PetscCall(MatDenseSetLDA(M, size_schur));
16589566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
16599566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(M, NULL, NULL));
16609566063dSJacob Faibussowitsch PetscCall(MatSeqDenseInvertFactors_Private(M));
16619566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M));
16629566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M));
16639566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M));
16649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M));
16659566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M));
16669566063dSJacob Faibussowitsch PetscCall(MatDenseSetLDA(M, size_schur));
16679566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M));
16689566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M));
16695002105bSStefano Zampini for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i];
16709566063dSJacob Faibussowitsch PetscCall(PetscFree(aux));
16713fc34f97SStefano Zampini }
16729566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all_inv, &data));
16733fc34f97SStefano Zampini } else { /* use MatFactor calls to invert S */
16749566063dSJacob Faibussowitsch PetscCall(MatFactorInvertSchurComplement(F));
16759566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1676683d3df6SStefano Zampini }
167732fe681dSStefano Zampini } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */
1678791bdc09SStefano Zampini if (multi_element) {
1679791bdc09SStefano Zampini PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S_all_inv));
1680791bdc09SStefano Zampini PetscCall(MatSetOption(S_all_inv, MAT_ROW_ORIENTED, sub_schurs->is_hermitian));
1681791bdc09SStefano Zampini for (PetscInt sub = 0; sub < n_local_subs; sub++) {
1682791bdc09SStefano Zampini const PetscScalar *vals;
1683791bdc09SStefano Zampini const PetscInt *idxs;
1684791bdc09SStefano Zampini PetscInt size_schur_sub;
1685791bdc09SStefano Zampini Mat M;
1686791bdc09SStefano Zampini
1687791bdc09SStefano Zampini PetscCall(MatCreateSubMatrix(S_all, is_sub_schur[sub], is_sub_schur[sub], MAT_INITIAL_MATRIX, &M));
1688791bdc09SStefano Zampini PetscCall(MatConvert(M, MATDENSE, MAT_INPLACE_MATRIX, &M));
1689791bdc09SStefano Zampini PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef));
1690791bdc09SStefano Zampini PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian));
1691791bdc09SStefano Zampini switch (sub_schurs->mat_factor_type) {
1692791bdc09SStefano Zampini case MAT_FACTOR_CHOLESKY:
1693791bdc09SStefano Zampini PetscCall(MatCholeskyFactor(M, NULL, NULL));
1694791bdc09SStefano Zampini break;
1695791bdc09SStefano Zampini case MAT_FACTOR_LU:
1696791bdc09SStefano Zampini PetscCall(MatLUFactor(M, NULL, NULL, NULL));
1697791bdc09SStefano Zampini break;
1698791bdc09SStefano Zampini default:
1699791bdc09SStefano Zampini SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
1700791bdc09SStefano Zampini }
1701791bdc09SStefano Zampini PetscCall(MatSeqDenseInvertFactors_Private(M));
1702791bdc09SStefano Zampini PetscCall(MatDenseGetArrayRead(M, &vals));
1703791bdc09SStefano Zampini PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub));
1704791bdc09SStefano Zampini PetscCall(ISGetIndices(is_sub_schur[sub], &idxs));
1705791bdc09SStefano Zampini PetscCall(MatSetValues(S_all_inv, size_schur_sub, idxs, size_schur_sub, idxs, vals, INSERT_VALUES));
1706791bdc09SStefano Zampini PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs));
1707791bdc09SStefano Zampini PetscCall(MatDenseRestoreArrayRead(M, &vals));
1708791bdc09SStefano Zampini PetscCall(MatDestroy(&M));
1709791bdc09SStefano Zampini }
1710791bdc09SStefano Zampini PetscCall(MatAssemblyBegin(S_all_inv, MAT_FINAL_ASSEMBLY));
1711791bdc09SStefano Zampini PetscCall(MatAssemblyEnd(S_all_inv, MAT_FINAL_ASSEMBLY));
1712791bdc09SStefano Zampini } else {
17139566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)S_all));
1714683d3df6SStefano Zampini S_all_inv = S_all;
17159566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all_inv, &S_data));
17169566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(size_schur, &B_N));
17179566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1718f4f7d9d6SStefano Zampini if (use_potr) {
1719792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr));
1720835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
1721792fecdfSBarry Smith PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr));
1722835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
1723f4f7d9d6SStefano Zampini } else if (use_sytr) {
1724792fecdfSBarry Smith PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1725835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
1726792fecdfSBarry Smith PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr));
1727835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
1728d6462365SStefano Zampini } else {
1729792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr));
1730835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
1731792fecdfSBarry Smith PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1732835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
1733be83ff47SStefano Zampini }
17349566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * size_schur * size_schur * size_schur));
17359566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop());
17369566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all_inv, &S_data));
1737791bdc09SStefano Zampini }
173832fe681dSStefano Zampini } else if (sub_schurs->gdsw) {
173932fe681dSStefano Zampini Mat tS, tX, SEj, S_II, S_IE, S_EE;
174032fe681dSStefano Zampini KSP pS_II;
174132fe681dSStefano Zampini PC pS_II_pc;
174232fe681dSStefano Zampini IS EE, II;
174332fe681dSStefano Zampini PetscInt nS;
174432fe681dSStefano Zampini
174532fe681dSStefano Zampini PetscCall(MatFactorCreateSchurComplement(F, &tS, NULL));
174632fe681dSStefano Zampini PetscCall(MatGetSize(tS, &nS, NULL));
174732fe681dSStefano Zampini PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
174832fe681dSStefano Zampini for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */
174932fe681dSStefano Zampini PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
175032fe681dSStefano Zampini PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj));
175132fe681dSStefano Zampini
175232fe681dSStefano Zampini PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE));
175332fe681dSStefano Zampini PetscCall(ISComplement(EE, 0, nS, &II));
175432fe681dSStefano Zampini PetscCall(MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II));
175532fe681dSStefano Zampini PetscCall(MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE));
175632fe681dSStefano Zampini PetscCall(MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE));
175732fe681dSStefano Zampini PetscCall(ISDestroy(&II));
175832fe681dSStefano Zampini PetscCall(ISDestroy(&EE));
175932fe681dSStefano Zampini
176032fe681dSStefano Zampini PetscCall(KSPCreate(PETSC_COMM_SELF, &pS_II));
17613821be0aSBarry Smith PetscCall(KSPSetNestLevel(pS_II, 1)); /* do not have direct access to a PC to provide the level of nesting of the KSP */
176232fe681dSStefano Zampini PetscCall(KSPSetType(pS_II, KSPPREONLY));
176332fe681dSStefano Zampini PetscCall(KSPGetPC(pS_II, &pS_II_pc));
176432fe681dSStefano Zampini PetscCall(PCSetType(pS_II_pc, PCSVD));
176532fe681dSStefano Zampini PetscCall(KSPSetOptionsPrefix(pS_II, sub_schurs->prefix));
176632fe681dSStefano Zampini PetscCall(KSPAppendOptionsPrefix(pS_II, "pseudo_"));
176732fe681dSStefano Zampini PetscCall(KSPSetOperators(pS_II, S_II, S_II));
176832fe681dSStefano Zampini PetscCall(MatDestroy(&S_II));
176932fe681dSStefano Zampini PetscCall(KSPSetFromOptions(pS_II));
177032fe681dSStefano Zampini PetscCall(KSPSetUp(pS_II));
177132fe681dSStefano Zampini PetscCall(MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX));
177232fe681dSStefano Zampini PetscCall(KSPMatSolve(pS_II, S_IE, tX));
177332fe681dSStefano Zampini PetscCall(KSPDestroy(&pS_II));
177432fe681dSStefano Zampini
1775fb842aefSJose E. Roman PetscCall(MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DETERMINE, &SEj));
177632fe681dSStefano Zampini PetscCall(MatDestroy(&S_IE));
177732fe681dSStefano Zampini PetscCall(MatDestroy(&tX));
177832fe681dSStefano Zampini PetscCall(MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN));
177932fe681dSStefano Zampini PetscCall(MatDestroy(&S_EE));
178032fe681dSStefano Zampini
178132fe681dSStefano Zampini PetscCall(MatDestroy(&SEj));
178232fe681dSStefano Zampini cum += subset_size;
178332fe681dSStefano Zampini SEjinv_arr += subset_size * subset_size;
178432fe681dSStefano Zampini }
178532fe681dSStefano Zampini PetscCall(MatDestroy(&tS));
178632fe681dSStefano Zampini PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1787be83ff47SStefano Zampini }
1788be83ff47SStefano Zampini /* S_Ej_tilda_all */
1789be83ff47SStefano Zampini cum = cum2 = 0;
179032fe681dSStefano Zampini rS_data = NULL;
1791791bdc09SStefano Zampini if (S_all_inv && !multi_element) PetscCall(MatDenseGetArrayRead(S_all_inv, &rS_data));
179232fe681dSStefano Zampini PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1793be83ff47SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) {
1794be83ff47SStefano Zampini PetscInt j;
1795862806e4SStefano Zampini
17969566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1797be83ff47SStefano Zampini /* get (St^-1)_E */
179872b8c272SStefano Zampini /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
179906a4e24aSStefano Zampini will be properly accessed later during adaptive selection */
1800791bdc09SStefano Zampini if (multi_element) { /* transpose copy to workspace */
1801791bdc09SStefano Zampini // XXX CSR directly?
1802791bdc09SStefano Zampini for (PetscInt j = 0; j < subset_size; j++) idx_work[j] = cum + j;
1803791bdc09SStefano Zampini PetscCall(MatGetValues(S_all_inv, subset_size, idx_work, subset_size, idx_work, work));
1804791bdc09SStefano Zampini if (!sub_schurs->is_hermitian) {
1805791bdc09SStefano Zampini for (PetscInt k = 0; k < subset_size; k++) {
1806791bdc09SStefano Zampini for (PetscInt j = k; j < subset_size; j++) {
1807791bdc09SStefano Zampini PetscScalar t = work[k * subset_size + j];
1808791bdc09SStefano Zampini work[k * subset_size + j] = work[j * subset_size + k];
1809791bdc09SStefano Zampini work[j * subset_size + k] = t;
1810791bdc09SStefano Zampini }
1811791bdc09SStefano Zampini }
1812791bdc09SStefano Zampini }
1813791bdc09SStefano Zampini } else if (rS_data) {
1814a0b0af32SStefano Zampini if (S_lower_triangular) {
1815be83ff47SStefano Zampini PetscInt k;
1816b7ab4a40SStefano Zampini if (sub_schurs->change) {
1817be83ff47SStefano Zampini for (k = 0; k < subset_size; k++) {
1818be83ff47SStefano Zampini for (j = k; j < subset_size; j++) {
18191683a169SBarry Smith work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
18206c3e6151SStefano Zampini work[j * subset_size + k] = work[k * subset_size + j];
1821be83ff47SStefano Zampini }
1822be83ff47SStefano Zampini }
182372b8c272SStefano Zampini } else {
182472b8c272SStefano Zampini for (k = 0; k < subset_size; k++) {
1825ad540459SPierre Jolivet for (j = k; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
182672b8c272SStefano Zampini }
182772b8c272SStefano Zampini }
182872b8c272SStefano Zampini } else {
1829be83ff47SStefano Zampini PetscInt k;
1830be83ff47SStefano Zampini for (k = 0; k < subset_size; k++) {
1831ad540459SPierre Jolivet for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1832be83ff47SStefano Zampini }
1833be83ff47SStefano Zampini }
183432fe681dSStefano Zampini }
1835b7ab4a40SStefano Zampini if (sub_schurs->change) {
18368760537fSStefano Zampini Mat change_sub, SEj, T;
183732fe681dSStefano Zampini PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL;
183872b8c272SStefano Zampini
183972b8c272SStefano Zampini /* change basis */
18409566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1841791bdc09SStefano Zampini PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, (rS_data || multi_element) ? work : SEjinv_arr, &SEj));
18428760537fSStefano Zampini if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
18438760537fSStefano Zampini Mat T2;
18449566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
18459566063dSJacob Faibussowitsch PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
18469566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T2));
18479566063dSJacob Faibussowitsch PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
18488760537fSStefano Zampini } else {
18499566063dSJacob Faibussowitsch PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
185072b8c272SStefano Zampini }
18519566063dSJacob Faibussowitsch PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
18529566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T));
185332fe681dSStefano Zampini PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL));
18549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&SEj));
185572b8c272SStefano Zampini }
1856791bdc09SStefano Zampini if (rS_data || multi_element) PetscCall(PetscArraycpy(SEjinv_arr, work, subset_size * subset_size));
1857be83ff47SStefano Zampini cum += subset_size;
1858be83ff47SStefano Zampini cum2 += subset_size * (size_schur + 1);
185904c5b2e6SStefano Zampini SEjinv_arr += subset_size * subset_size;
1860883469d8SStefano Zampini }
186132fe681dSStefano Zampini PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
186232fe681dSStefano Zampini if (S_all_inv) {
1863791bdc09SStefano Zampini if (!multi_element) PetscCall(MatDenseRestoreArrayRead(S_all_inv, &rS_data));
1864df4d28bfSStefano Zampini if (solver_S) {
18653fc34f97SStefano Zampini if (schur_has_vertices) {
18669566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED));
18673fc34f97SStefano Zampini } else {
18689566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED));
18695db18549SStefano Zampini }
18703fc34f97SStefano Zampini }
187132fe681dSStefano Zampini }
18729566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_all_inv));
1873683d3df6SStefano Zampini }
1874683d3df6SStefano Zampini
18753fc34f97SStefano Zampini /* move back factors if needed */
187632fe681dSStefano Zampini if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) {
1877683d3df6SStefano Zampini Mat S_tmp;
18783fc34f97SStefano Zampini PetscInt nd = 0;
1879683d3df6SStefano Zampini PetscScalar *data;
1880683d3df6SStefano Zampini
18810fdf79fbSJacob Faibussowitsch PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
18820fdf79fbSJacob Faibussowitsch PetscCheck(solver_S, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
18830fdf79fbSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_tmp, NULL));
18849566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_tmp, &data));
18859566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(data, size_schur * size_schur));
1886683d3df6SStefano Zampini
1887683d3df6SStefano Zampini if (S_lower_triangular) {
1888683d3df6SStefano Zampini cum = 0;
1889683d3df6SStefano Zampini for (i = 0; i < size_active_schur; i++) {
18909566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i));
1891683d3df6SStefano Zampini cum += size_active_schur - i;
1892683d3df6SStefano Zampini }
1893683d3df6SStefano Zampini } else {
18949566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(data, schur_factor, size_schur * size_schur));
1895683d3df6SStefano Zampini }
1896683d3df6SStefano Zampini if (sub_schurs->is_dir) {
18979566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1898ad540459SPierre Jolivet for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i];
1899683d3df6SStefano Zampini }
19006dba178dSStefano Zampini /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1901683d3df6SStefano Zampini set the diagonal entry of the Schur factor to a very large value */
1902ad540459SPierre Jolivet for (i = size_active_schur + nd; i < size_schur; i++) data[i * (size_schur + 1)] = infty;
19039566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_tmp, &data));
19049566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED));
19059087bf02SStefano Zampini }
190632fe681dSStefano Zampini } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */
1907367aa537SStefano Zampini PetscScalar *data;
1908367aa537SStefano Zampini PetscInt nd = 0;
1909367aa537SStefano Zampini
1910367aa537SStefano Zampini if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
19119566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1912367aa537SStefano Zampini }
19139566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
19149566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all, &data));
191548a46eb9SPierre Jolivet for (i = 0; i < size_active_schur; i++) PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
1916367aa537SStefano Zampini for (i = size_active_schur + nd; i < size_schur; i++) {
19179566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
19186c3e6151SStefano Zampini data[i * (size_schur + 1)] = infty;
1919367aa537SStefano Zampini }
19209566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all, &data));
19219566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
19224a6c6b0dSStefano Zampini }
1923791bdc09SStefano Zampini PetscCall(PetscFree(idx_work));
19249566063dSJacob Faibussowitsch PetscCall(PetscFree(work));
19259566063dSJacob Faibussowitsch PetscCall(PetscFree(schur_factor));
19269566063dSJacob Faibussowitsch PetscCall(VecDestroy(&Dall));
1927791bdc09SStefano Zampini PetscCall(ISDestroy(&is_schur));
1928791bdc09SStefano Zampini if (multi_element) {
1929791bdc09SStefano Zampini for (PetscInt sub = 0; sub < n_local_subs; sub++) {
1930791bdc09SStefano Zampini PetscCall(ISDestroy(&is_sub_all[sub]));
1931791bdc09SStefano Zampini PetscCall(ISDestroy(&is_sub_schur_all[sub]));
1932791bdc09SStefano Zampini PetscCall(ISDestroy(&is_sub_schur[sub]));
1933791bdc09SStefano Zampini }
1934791bdc09SStefano Zampini PetscCall(PetscFree3(is_sub_all, is_sub_schur_all, is_sub_schur));
1935791bdc09SStefano Zampini }
19364a6c6b0dSStefano Zampini }
19379566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_I_layer));
19389566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_all));
19399566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A_BB));
19409566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A_IB));
19419566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A_BI));
19429566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F));
19436afe12f5SStefano Zampini
19449566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
19459566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
19465a95e1ceSStefano Zampini if (compute_Stilda) {
19479566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
19489566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
19499d54b7f4SStefano Zampini if (deluxe) {
19509566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
19519566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
195208122e43SStefano Zampini }
19539d54b7f4SStefano Zampini }
19546afe12f5SStefano Zampini
19555db18549SStefano Zampini /* Get local part of (\sum_j S_Ej) */
195648a46eb9SPierre Jolivet if (!sub_schurs->sum_S_Ej_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all));
19579566063dSJacob Faibussowitsch PetscCall(VecSet(gstash, 0.0));
19589566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray));
19599566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash, stasharray));
19609566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
19619566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
19629566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray));
19639566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash));
19649566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray));
19659566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash, stasharray));
19669566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
19679566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
19689566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray));
19699566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash));
197008122e43SStefano Zampini
1971f6f667cfSStefano Zampini /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
19725a95e1ceSStefano Zampini if (compute_Stilda) {
19739566063dSJacob Faibussowitsch PetscCall(VecSet(gstash, 0.0));
19749566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
19759566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash, stasharray));
19769566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
19779566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
19789566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
19799566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
19809566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
19819566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash));
19829d54b7f4SStefano Zampini if (deluxe) {
19839566063dSJacob Faibussowitsch PetscCall(VecSet(gstash, 0.0));
19849566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
19859566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash, stasharray));
19869566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
19879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
19889566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
19899566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
19909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
19919566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash));
199232fe681dSStefano Zampini } else if (!sub_schurs->gdsw) {
19939d54b7f4SStefano Zampini PetscScalar *array;
19949d54b7f4SStefano Zampini PetscInt cum;
19959d54b7f4SStefano Zampini
19969566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array));
19979d54b7f4SStefano Zampini cum = 0;
19989d54b7f4SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) {
19999566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
20009566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(subset_size, &B_N));
20019566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
2002f4f7d9d6SStefano Zampini if (use_potr) {
2003792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr));
2004835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
2005792fecdfSBarry Smith PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr));
2006835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
2007f4f7d9d6SStefano Zampini } else if (use_sytr) {
2008792fecdfSBarry Smith PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
2009835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
2010792fecdfSBarry Smith PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr));
2011835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
2012f4f7d9d6SStefano Zampini } else {
2013792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr));
2014835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
2015792fecdfSBarry Smith PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
2016835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
2017f4f7d9d6SStefano Zampini }
20189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * subset_size * subset_size * subset_size));
20199566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop());
20209d54b7f4SStefano Zampini cum += subset_size * subset_size;
20219d54b7f4SStefano Zampini }
20229566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array));
20239566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all));
20249566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
20259d54b7f4SStefano Zampini sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
20269d54b7f4SStefano Zampini }
202708122e43SStefano Zampini }
20289566063dSJacob Faibussowitsch PetscCall(VecDestroy(&lstash));
20299566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gstash));
20309566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&sstash));
203157a87bf3SStefano Zampini
20323b03f7bbSStefano Zampini if (matl_dbg_viewer) {
203311955456SStefano Zampini if (sub_schurs->S_Ej_all) {
20349566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE"));
20359566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->S_Ej_all, matl_dbg_viewer));
203611955456SStefano Zampini }
203711955456SStefano Zampini if (sub_schurs->sum_S_Ej_all) {
20389566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE"));
20399566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer));
204011955456SStefano Zampini }
204111955456SStefano Zampini if (sub_schurs->sum_S_Ej_inv_all) {
20429566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm"));
20439566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer));
204411955456SStefano Zampini }
204511955456SStefano Zampini if (sub_schurs->sum_S_Ej_tilda_all) {
20469566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt"));
20479566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer));
204811955456SStefano Zampini }
204911955456SStefano Zampini }
20503202ece2SStefano Zampini
205179329b78SStefano Zampini /* when not explicit, we need to set the factor type */
205279329b78SStefano Zampini if (sub_schurs->mat_factor_type == MAT_FACTOR_NONE) sub_schurs->mat_factor_type = sub_schurs->is_hermitian ? MAT_FACTOR_CHOLESKY : MAT_FACTOR_LU;
205379329b78SStefano Zampini
20545a95e1ceSStefano Zampini /* free workspace */
205551ab8ad6SStefano Zampini if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer));
205651ab8ad6SStefano Zampini if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n));
20579566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&matl_dbg_viewer));
20589566063dSJacob Faibussowitsch PetscCall(PetscFree2(Bwork, pivots));
20599566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm_n));
2060791bdc09SStefano Zampini PetscCall(PetscFree(all_local_subid_N));
20613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2062b1b3d7a2SStefano Zampini }
2063b1b3d7a2SStefano Zampini
PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs,const char * prefix,IS is_I,IS is_B,PCBDDCGraph graph,ISLocalToGlobalMapping BtoNmap,PetscBool copycc,PetscBool gdsw)2064d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw)
2065d71ae5a4SJacob Faibussowitsch {
20669bb4a8caSStefano Zampini IS *faces, *edges, *all_cc, vertices;
206732fe681dSStefano Zampini PetscInt s, i, n_faces, n_edges, n_all_cc;
2068365a3a41SStefano Zampini PetscBool is_sorted, ispardiso, ismumps;
2069b1b3d7a2SStefano Zampini
2070b1b3d7a2SStefano Zampini PetscFunctionBegin;
20719566063dSJacob Faibussowitsch PetscCall(ISSorted(is_I, &is_sorted));
207228b400f6SJacob Faibussowitsch PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_I), PETSC_ERR_PLIB, "IS for I dofs should be shorted");
20739566063dSJacob Faibussowitsch PetscCall(ISSorted(is_B, &is_sorted));
207428b400f6SJacob Faibussowitsch PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_B), PETSC_ERR_PLIB, "IS for B dofs should be shorted");
2075b1b3d7a2SStefano Zampini
2076b1b3d7a2SStefano Zampini /* reset any previous data */
20779566063dSJacob Faibussowitsch PetscCall(PCBDDCSubSchursReset(sub_schurs));
2078b1b3d7a2SStefano Zampini
207932fe681dSStefano Zampini sub_schurs->gdsw = gdsw;
2080791bdc09SStefano Zampini sub_schurs->graph = graph;
208132fe681dSStefano Zampini
20825a95e1ceSStefano Zampini /* get index sets for faces and edges (already sorted by global ordering) */
20839566063dSJacob Faibussowitsch PetscCall(PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
2084b1b3d7a2SStefano Zampini n_all_cc = n_faces + n_edges;
20859566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(n_all_cc, &sub_schurs->is_edge));
20869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_all_cc, &all_cc));
208732fe681dSStefano Zampini n_all_cc = 0;
2088b1b3d7a2SStefano Zampini for (i = 0; i < n_faces; i++) {
208932fe681dSStefano Zampini PetscCall(ISGetSize(faces[i], &s));
209032fe681dSStefano Zampini if (!s) continue;
20918b6046baSStefano Zampini if (copycc) {
209232fe681dSStefano Zampini PetscCall(ISDuplicate(faces[i], &all_cc[n_all_cc]));
20938b6046baSStefano Zampini } else {
20949566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)faces[i]));
209532fe681dSStefano Zampini all_cc[n_all_cc] = faces[i];
2096b1b3d7a2SStefano Zampini }
209732fe681dSStefano Zampini n_all_cc++;
20988b6046baSStefano Zampini }
2099b1b3d7a2SStefano Zampini for (i = 0; i < n_edges; i++) {
210032fe681dSStefano Zampini PetscCall(ISGetSize(edges[i], &s));
210132fe681dSStefano Zampini if (!s) continue;
21028b6046baSStefano Zampini if (copycc) {
210332fe681dSStefano Zampini PetscCall(ISDuplicate(edges[i], &all_cc[n_all_cc]));
21048b6046baSStefano Zampini } else {
21059566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)edges[i]));
210632fe681dSStefano Zampini all_cc[n_all_cc] = edges[i];
21078b6046baSStefano Zampini }
210832fe681dSStefano Zampini PetscCall(PetscBTSet(sub_schurs->is_edge, n_all_cc));
210932fe681dSStefano Zampini n_all_cc++;
2110b1b3d7a2SStefano Zampini }
21119566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)vertices));
2112c8272957SStefano Zampini sub_schurs->is_vertices = vertices;
21139566063dSJacob Faibussowitsch PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
2114d62866d3SStefano Zampini sub_schurs->is_dir = NULL;
21159566063dSJacob Faibussowitsch PetscCall(PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir));
2116b1b3d7a2SStefano Zampini
2117df4d28bfSStefano Zampini /* Determine if MatFactor can be used */
21189566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(prefix, &sub_schurs->prefix));
2119883469d8SStefano Zampini #if defined(PETSC_HAVE_MUMPS)
21209566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type)));
212188113c35SStefano Zampini #elif defined(PETSC_HAVE_MKL_PARDISO)
21229566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type)));
212388113c35SStefano Zampini #else
21249566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type)));
2125df4d28bfSStefano Zampini #endif
212679329b78SStefano Zampini sub_schurs->mat_factor_type = MAT_FACTOR_NONE;
2127*fc2fb351SPierre Jolivet sub_schurs->is_hermitian = PetscDefined(USE_COMPLEX) ? PETSC_FALSE : PETSC_TRUE; /* Hermitian Cholesky is not supported by PETSc and external packages */
212888113c35SStefano Zampini sub_schurs->is_posdef = PETSC_TRUE;
212911955456SStefano Zampini sub_schurs->is_symmetric = PETSC_TRUE;
21307f9db97bSStefano Zampini sub_schurs->debug = PETSC_FALSE;
2131991c41b4SStefano Zampini sub_schurs->restrict_comm = PETSC_FALSE;
2132d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
21339566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL));
213479329b78SStefano Zampini PetscCall(PetscOptionsEnum("-sub_schurs_mat_factor_type", "Factor type to use. Use MAT_FACTOR_NONE for automatic selection", NULL, MatFactorTypes, (PetscEnum)sub_schurs->mat_factor_type, (PetscEnum *)&sub_schurs->mat_factor_type, NULL));
21359566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL));
21369566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL));
21379566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL));
21389566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL));
21399566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL));
2140d0609cedSBarry Smith PetscOptionsEnd();
21419566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps));
21429566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso));
2143365a3a41SStefano Zampini sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
2144b1b3d7a2SStefano Zampini
2145a678f235SPierre Jolivet /* for reals, symmetric and Hermitian are synonyms */
214611955456SStefano Zampini #if !defined(PETSC_USE_COMPLEX)
214711955456SStefano Zampini sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
214811955456SStefano Zampini sub_schurs->is_hermitian = sub_schurs->is_symmetric;
214911955456SStefano Zampini #endif
215011955456SStefano Zampini
21519566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)is_I));
2152b1b3d7a2SStefano Zampini sub_schurs->is_I = is_I;
21539566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)is_B));
2154b1b3d7a2SStefano Zampini sub_schurs->is_B = is_B;
21559566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)graph->l2gmap));
21565db18549SStefano Zampini sub_schurs->l2gmap = graph->l2gmap;
21579566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)BtoNmap));
21585db18549SStefano Zampini sub_schurs->BtoNmap = BtoNmap;
21595a95e1ceSStefano Zampini sub_schurs->n_subs = n_all_cc;
2160b1b3d7a2SStefano Zampini sub_schurs->is_subs = all_cc;
2161b96c3477SStefano Zampini sub_schurs->S_Ej_all = NULL;
2162b96c3477SStefano Zampini sub_schurs->sum_S_Ej_all = NULL;
216308122e43SStefano Zampini sub_schurs->sum_S_Ej_inv_all = NULL;
2164b96c3477SStefano Zampini sub_schurs->sum_S_Ej_tilda_all = NULL;
2165b96c3477SStefano Zampini sub_schurs->is_Ej_all = NULL;
21663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2167b1b3d7a2SStefano Zampini }
2168b1b3d7a2SStefano Zampini
PCBDDCSubSchursCreate(PCBDDCSubSchurs * sub_schurs)2169d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
2170d71ae5a4SJacob Faibussowitsch {
217134a97f8cSStefano Zampini PCBDDCSubSchurs schurs_ctx;
217234a97f8cSStefano Zampini
217334a97f8cSStefano Zampini PetscFunctionBegin;
21749566063dSJacob Faibussowitsch PetscCall(PetscNew(&schurs_ctx));
21755ff63025SStefano Zampini schurs_ctx->n_subs = 0;
217634a97f8cSStefano Zampini *sub_schurs = schurs_ctx;
21773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
217834a97f8cSStefano Zampini }
217934a97f8cSStefano Zampini
PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)2180d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
2181d71ae5a4SJacob Faibussowitsch {
218234a97f8cSStefano Zampini PetscFunctionBegin;
21833ba16761SJacob Faibussowitsch if (!sub_schurs) PetscFunctionReturn(PETSC_SUCCESS);
2184791bdc09SStefano Zampini sub_schurs->graph = NULL;
21859566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->prefix));
21869566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->A));
21879566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->S));
21889566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_I));
21899566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_B));
21909566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap));
21919566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap));
21929566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
21939566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
21949566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
21959566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
21969566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_Ej_all));
21979566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_vertices));
21989566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_dir));
21999566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&sub_schurs->is_edge));
2200791bdc09SStefano Zampini for (PetscInt i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISDestroy(&sub_schurs->is_subs[i]));
22011baa6e33SBarry Smith if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs));
22021baa6e33SBarry Smith if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
22039566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->reuse_solver));
220472b8c272SStefano Zampini if (sub_schurs->change) {
2205791bdc09SStefano Zampini for (PetscInt i = 0; i < sub_schurs->n_subs; i++) {
22069566063dSJacob Faibussowitsch PetscCall(KSPDestroy(&sub_schurs->change[i]));
22079566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i]));
220872b8c272SStefano Zampini }
220972b8c272SStefano Zampini }
22109566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->change));
22119566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->change_primal_sub));
221234a97f8cSStefano Zampini sub_schurs->n_subs = 0;
22133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
221434a97f8cSStefano Zampini }
221534a97f8cSStefano Zampini
PCBDDCSubSchursDestroy(PCBDDCSubSchurs * sub_schurs)2216d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
2217d71ae5a4SJacob Faibussowitsch {
2218aea80f77Sstefano_zampini PetscFunctionBegin;
22199566063dSJacob Faibussowitsch PetscCall(PCBDDCSubSchursReset(*sub_schurs));
22209566063dSJacob Faibussowitsch PetscCall(PetscFree(*sub_schurs));
22213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2222aea80f77Sstefano_zampini }
2223aea80f77Sstefano_zampini
PCBDDCAdjGetNextLayer_Private(PetscInt * queue_tip,PetscInt n_prev,PetscBT touched,PetscInt * xadj,PetscInt * adjncy,PetscInt * n_added)2224d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added)
2225d71ae5a4SJacob Faibussowitsch {
222634a97f8cSStefano Zampini PetscInt i, j, n;
222734a97f8cSStefano Zampini
222834a97f8cSStefano Zampini PetscFunctionBegin;
222934a97f8cSStefano Zampini n = 0;
223034a97f8cSStefano Zampini for (i = -n_prev; i < 0; i++) {
223134a97f8cSStefano Zampini PetscInt start_dof = queue_tip[i];
223234a97f8cSStefano Zampini for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) {
223334a97f8cSStefano Zampini PetscInt dof = adjncy[j];
223434a97f8cSStefano Zampini if (!PetscBTLookup(touched, dof)) {
22359566063dSJacob Faibussowitsch PetscCall(PetscBTSet(touched, dof));
223634a97f8cSStefano Zampini queue_tip[n] = dof;
223734a97f8cSStefano Zampini n++;
223834a97f8cSStefano Zampini }
223934a97f8cSStefano Zampini }
224034a97f8cSStefano Zampini }
224134a97f8cSStefano Zampini *n_added = n;
22423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
224334a97f8cSStefano Zampini }
2244