xref: /petsc/src/ksp/pc/impls/bddc/bddcscalingbasic.c (revision 2d776b4963042cdf8a412ba09e923aa51facd799)
1 #include <petsc/private/pcbddcimpl.h>
2 #include <petsc/private/pcbddcprivateimpl.h>
3 #include <petscblaslapack.h>
4 #include <../src/mat/impls/dense/seq/dense.h>
5 
6 /* prototypes for deluxe functions */
7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
12 
13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X)
14 {
15   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
16   const PetscScalar *b;
17   PetscScalar       *x;
18   PetscInt           n;
19   PetscBLASInt       nrhs, info, m;
20   PetscBool          flg;
21 
22   PetscFunctionBegin;
23   PetscCall(PetscBLASIntCast(A->rmap->n, &m));
24   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
25   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
26   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
27   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
28 
29   PetscCall(MatGetSize(B, NULL, &n));
30   PetscCall(PetscBLASIntCast(n, &nrhs));
31   PetscCall(MatDenseGetArrayRead(B, &b));
32   PetscCall(MatDenseGetArray(X, &x));
33   PetscCall(PetscArraycpy(x, b, m * nrhs));
34   PetscCall(MatDenseRestoreArrayRead(B, &b));
35 
36   if (A->factortype == MAT_FACTOR_LU) {
37     PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
38     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve");
39   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported");
40 
41   PetscCall(MatDenseRestoreArray(X, &x));
42   PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m)));
43   PetscFunctionReturn(0);
44 }
45 
46 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
47 {
48   PC_IS   *pcis   = (PC_IS *)pc->data;
49   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
50 
51   PetscFunctionBegin;
52   /* Apply partition of unity */
53   PetscCall(VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector));
54   PetscCall(VecSet(global_vector, 0.0));
55   PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE));
56   PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE));
57   PetscFunctionReturn(0);
58 }
59 
60 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
61 {
62   PC_IS              *pcis       = (PC_IS *)pc->data;
63   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
64   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
65 
66   PetscFunctionBegin;
67   PetscCall(VecSet(pcbddc->work_scaling, 0.0));
68   PetscCall(VecSet(y, 0.0));
69   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
70     PetscInt           i;
71     const PetscScalar *array_x, *array_D;
72     PetscScalar       *array;
73     PetscCall(VecGetArrayRead(x, &array_x));
74     PetscCall(VecGetArrayRead(pcis->D, &array_D));
75     PetscCall(VecGetArray(pcbddc->work_scaling, &array));
76     for (i = 0; i < deluxe_ctx->n_simple; i++) array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]];
77     PetscCall(VecRestoreArray(pcbddc->work_scaling, &array));
78     PetscCall(VecRestoreArrayRead(pcis->D, &array_D));
79     PetscCall(VecRestoreArrayRead(x, &array_x));
80   }
81   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
82   if (deluxe_ctx->seq_mat) {
83     PetscInt i;
84     for (i = 0; i < deluxe_ctx->seq_n; i++) {
85       if (deluxe_ctx->change) {
86         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
87         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
88         if (deluxe_ctx->change_with_qr) {
89           Mat change;
90 
91           PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
92           PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
93         } else {
94           PetscCall(KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
95         }
96       } else {
97         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
98         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
99       }
100       PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
101       if (deluxe_ctx->seq_mat_inv_sum[i]) {
102         PetscScalar *x;
103 
104         PetscCall(VecGetArray(deluxe_ctx->seq_work2[i], &x));
105         PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i], x));
106         PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i], &x));
107         PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
108         PetscCall(VecResetArray(deluxe_ctx->seq_work1[i]));
109       }
110       if (deluxe_ctx->change) {
111         Mat change;
112 
113         PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
114         PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
115         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
116         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
117       } else {
118         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
119         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
120       }
121     }
122   }
123   /* put local boundary part in global vector */
124   PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE));
125   PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE));
126   PetscFunctionReturn(0);
127 }
128 
129 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
130 {
131   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
132 
133   PetscFunctionBegin;
134   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
135   PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 2);
136   PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 3);
137   PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!");
138   PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector));
139   PetscFunctionReturn(0);
140 }
141 
142 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
143 {
144   PC_IS *pcis = (PC_IS *)pc->data;
145 
146   PetscFunctionBegin;
147   PetscCall(VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD));
148   PetscCall(VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD));
149   /* Apply partition of unity */
150   PetscCall(VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector));
151   PetscFunctionReturn(0);
152 }
153 
154 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
155 {
156   PC_IS              *pcis       = (PC_IS *)pc->data;
157   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
158   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
159 
160   PetscFunctionBegin;
161   /* get local boundary part of global vector */
162   PetscCall(VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD));
163   PetscCall(VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD));
164   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
165     PetscInt           i;
166     PetscScalar       *array_y;
167     const PetscScalar *array_D;
168     PetscCall(VecGetArray(y, &array_y));
169     PetscCall(VecGetArrayRead(pcis->D, &array_D));
170     for (i = 0; i < deluxe_ctx->n_simple; i++) array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
171     PetscCall(VecRestoreArrayRead(pcis->D, &array_D));
172     PetscCall(VecRestoreArray(y, &array_y));
173   }
174   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
175   if (deluxe_ctx->seq_mat) {
176     PetscInt i;
177     for (i = 0; i < deluxe_ctx->seq_n; i++) {
178       if (deluxe_ctx->change) {
179         Mat change;
180 
181         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
182         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
183         PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
184         PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
185       } else {
186         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
187         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
188       }
189       if (deluxe_ctx->seq_mat_inv_sum[i]) {
190         PetscScalar *x;
191 
192         PetscCall(VecGetArray(deluxe_ctx->seq_work1[i], &x));
193         PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i], x));
194         PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i], &x));
195         PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
196         PetscCall(VecResetArray(deluxe_ctx->seq_work2[i]));
197       }
198       PetscCall(MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
199       if (deluxe_ctx->change) {
200         if (deluxe_ctx->change_with_qr) {
201           Mat change;
202 
203           PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
204           PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
205         } else {
206           PetscCall(KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
207         }
208         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE));
209         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE));
210       } else {
211         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE));
212         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE));
213       }
214     }
215   }
216   PetscFunctionReturn(0);
217 }
218 
219 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
220 {
221   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
222 
223   PetscFunctionBegin;
224   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
225   PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 2);
226   PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 3);
227   PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!");
228   PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector));
229   PetscFunctionReturn(0);
230 }
231 
232 PetscErrorCode PCBDDCScalingSetUp(PC pc)
233 {
234   PC_IS   *pcis   = (PC_IS *)pc->data;
235   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
236 
237   PetscFunctionBegin;
238   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
239   PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0));
240   /* create work vector for the operator */
241   PetscCall(VecDestroy(&pcbddc->work_scaling));
242   PetscCall(VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling));
243   /* always rebuild pcis->D */
244   if (pcis->use_stiffness_scaling) {
245     PetscScalar *a;
246     PetscInt     i, n;
247 
248     PetscCall(MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N));
249     PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD));
250     PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD));
251     PetscCall(VecAbs(pcis->D));
252     PetscCall(VecGetLocalSize(pcis->D, &n));
253     PetscCall(VecGetArray(pcis->D, &a));
254     for (i = 0; i < n; i++)
255       if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0;
256     PetscCall(VecRestoreArray(pcis->D, &a));
257   }
258   PetscCall(VecSet(pcis->vec1_global, 0.0));
259   PetscCall(VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
260   PetscCall(VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
261   PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
262   PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
263   PetscCall(VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B));
264   /* now setup */
265   if (pcbddc->use_deluxe_scaling) {
266     if (!pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingCreate_Deluxe(pc));
267     PetscCall(PCBDDCScalingSetUp_Deluxe(pc));
268     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe));
269     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe));
270   } else {
271     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic));
272     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic));
273   }
274   PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0));
275 
276   /* test */
277   if (pcbddc->dbg_flag) {
278     Mat         B0_B  = NULL;
279     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
280     Vec         vec2_global;
281     PetscViewer viewer = pcbddc->dbg_viewer;
282     PetscReal   error;
283 
284     /* extension -> from local to parallel */
285     PetscCall(VecSet(pcis->vec1_global, 0.0));
286     PetscCall(VecSetRandom(pcis->vec1_B, NULL));
287     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
288     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
289     PetscCall(VecDuplicate(pcis->vec1_global, &vec2_global));
290     PetscCall(VecCopy(pcis->vec1_global, vec2_global));
291     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
292     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
293     if (pcbddc->benign_n) {
294       IS is_dummy;
295 
296       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
297       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
298       PetscCall(ISDestroy(&is_dummy));
299       PetscCall(MatCreateVecs(B0_B, NULL, &B0_Bv));
300       PetscCall(VecDuplicate(B0_Bv, &B0_Bv2));
301       PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv));
302     }
303     PetscCall(PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global));
304     if (pcbddc->benign_saddle_point) {
305       PetscReal errorl = 0.;
306       PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
307       PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
308       if (pcbddc->benign_n) {
309         PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv2));
310         PetscCall(VecAXPY(B0_Bv, -1.0, B0_Bv2));
311         PetscCall(VecNorm(B0_Bv, NORM_INFINITY, &errorl));
312       }
313       PetscCallMPI(MPI_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc)));
314       PetscCall(PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error));
315     }
316     PetscCall(VecAXPY(pcis->vec1_global, -1.0, vec2_global));
317     PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error));
318     PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error));
319     PetscCall(VecDestroy(&vec2_global));
320 
321     /* restriction -> from parallel to local */
322     PetscCall(VecSet(pcis->vec1_global, 0.0));
323     PetscCall(VecSetRandom(pcis->vec1_B, NULL));
324     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
325     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
326     PetscCall(PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B));
327     PetscCall(VecScale(pcis->vec1_B, -1.0));
328     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
329     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
330     PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error));
331     PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error));
332     PetscCall(MatDestroy(&B0_B));
333     PetscCall(VecDestroy(&B0_Bv));
334     PetscCall(VecDestroy(&B0_Bv2));
335   }
336   PetscFunctionReturn(0);
337 }
338 
339 PetscErrorCode PCBDDCScalingDestroy(PC pc)
340 {
341   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
342 
343   PetscFunctionBegin;
344   if (pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingDestroy_Deluxe(pc));
345   PetscCall(VecDestroy(&pcbddc->work_scaling));
346   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL));
347   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL));
348   PetscFunctionReturn(0);
349 }
350 
351 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
352 {
353   PC_BDDC            *pcbddc = (PC_BDDC *)pc->data;
354   PCBDDCDeluxeScaling deluxe_ctx;
355 
356   PetscFunctionBegin;
357   PetscCall(PetscNew(&deluxe_ctx));
358   pcbddc->deluxe_ctx = deluxe_ctx;
359   PetscFunctionReturn(0);
360 }
361 
362 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
363 {
364   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
365 
366   PetscFunctionBegin;
367   PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx));
368   PetscCall(PetscFree(pcbddc->deluxe_ctx));
369   PetscFunctionReturn(0);
370 }
371 
372 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
373 {
374   PetscInt i;
375 
376   PetscFunctionBegin;
377   PetscCall(PetscFree(deluxe_ctx->idx_simple_B));
378   deluxe_ctx->n_simple = 0;
379   for (i = 0; i < deluxe_ctx->seq_n; i++) {
380     PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i]));
381     PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i]));
382     PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i]));
383     PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
384     PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
385   }
386   PetscCall(PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum));
387   PetscCall(PetscFree(deluxe_ctx->workspace));
388   deluxe_ctx->seq_n = 0;
389   PetscFunctionReturn(0);
390 }
391 
392 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
393 {
394   PC_IS              *pcis       = (PC_IS *)pc->data;
395   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
396   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
397   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs;
398 
399   PetscFunctionBegin;
400   /* reset data structures if the topology has changed */
401   if (pcbddc->recompute_topography) PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx));
402 
403   /* Compute data structures to solve sequential problems */
404   PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc));
405 
406   /* diagonal scaling on interface dofs not contained in cc */
407   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
408     PetscInt n_com, n_dir;
409     n_com = 0;
410     if (sub_schurs->is_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_com));
411     n_dir = 0;
412     if (sub_schurs->is_dir) PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
413     if (!deluxe_ctx->n_simple) {
414       deluxe_ctx->n_simple = n_dir + n_com;
415       PetscCall(PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B));
416       if (sub_schurs->is_vertices) {
417         PetscInt        nmap;
418         const PetscInt *idxs;
419 
420         PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
421         PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B));
422         PetscCheck(nmap == n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_com);
423         PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
424       }
425       if (sub_schurs->is_dir) {
426         PetscInt        nmap;
427         const PetscInt *idxs;
428 
429         PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
430         PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com));
431         PetscCheck(nmap == n_dir, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_dir);
432         PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
433       }
434       PetscCall(PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B));
435     } else {
436       PetscCheck(deluxe_ctx->n_simple == n_dir + n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT, n_dir + n_com, deluxe_ctx->n_simple);
437     }
438   } else {
439     deluxe_ctx->n_simple     = 0;
440     deluxe_ctx->idx_simple_B = NULL;
441   }
442   PetscFunctionReturn(0);
443 }
444 
445 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
446 {
447   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
448   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
449   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs;
450   PetscScalar        *matdata, *matdata2;
451   PetscInt            i, max_subset_size, cum, cum2;
452   const PetscInt     *idxs;
453   PetscBool           newsetup = PETSC_FALSE;
454 
455   PetscFunctionBegin;
456   PetscCheck(sub_schurs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Missing PCBDDCSubSchurs");
457   if (!sub_schurs->n_subs) PetscFunctionReturn(0);
458 
459   /* Allocate arrays for subproblems */
460   if (!deluxe_ctx->seq_n) {
461     deluxe_ctx->seq_n = sub_schurs->n_subs;
462     PetscCall(PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum));
463     newsetup = PETSC_TRUE;
464   } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT, deluxe_ctx->seq_n, sub_schurs->n_subs);
465 
466   /* the change of basis is just a reference to sub_schurs->change (if any) */
467   deluxe_ctx->change         = sub_schurs->change;
468   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
469 
470   /* Create objects for deluxe */
471   max_subset_size = 0;
472   for (i = 0; i < sub_schurs->n_subs; i++) {
473     PetscInt subset_size;
474     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
475     max_subset_size = PetscMax(subset_size, max_subset_size);
476   }
477   if (newsetup) PetscCall(PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace));
478   cum = cum2 = 0;
479   PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
480   PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata));
481   PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2));
482   for (i = 0; i < deluxe_ctx->seq_n; i++) {
483     PetscInt subset_size;
484 
485     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
486     if (newsetup) {
487       IS sub;
488       /* work vectors */
489       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i]));
490       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i]));
491 
492       /* scatters */
493       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub));
494       PetscCall(VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i]));
495       PetscCall(ISDestroy(&sub));
496     }
497 
498     /* S_E_j */
499     PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
500     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i]));
501 
502     /* \sum_k S^k_E_j */
503     PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
504     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i]));
505     PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef));
506     PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian));
507     if (sub_schurs->is_hermitian) {
508       PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL));
509     } else {
510       PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL));
511     }
512     if (pcbddc->deluxe_singlemat) {
513       Mat X, Y;
514       if (!sub_schurs->is_hermitian) {
515         PetscCall(MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X));
516       } else {
517         PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]));
518         X = deluxe_ctx->seq_mat[i];
519       }
520       PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y));
521       if (!sub_schurs->is_hermitian) {
522         PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y));
523       } else {
524         PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y));
525       }
526 
527       PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
528       PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
529       PetscCall(MatDestroy(&X));
530       if (deluxe_ctx->change) {
531         Mat C, CY;
532         PetscCheck(deluxe_ctx->change_with_qr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only QR based change of basis");
533         PetscCall(KSPGetOperators(deluxe_ctx->change[i], &C, NULL));
534         PetscCall(MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CY));
535         PetscCall(MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
536         PetscCall(MatDestroy(&CY));
537         PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */
538       }
539       PetscCall(MatTranspose(Y, MAT_INPLACE_MATRIX, &Y));
540       deluxe_ctx->seq_mat[i] = Y;
541     }
542     cum += subset_size;
543     cum2 += subset_size * subset_size;
544   }
545   PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
546   PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata));
547   PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2));
548   if (pcbddc->deluxe_singlemat) {
549     deluxe_ctx->change         = NULL;
550     deluxe_ctx->change_with_qr = PETSC_FALSE;
551   }
552 
553   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
554     for (i = 0; i < deluxe_ctx->seq_n; i++) {
555       if (newsetup) {
556         PC pc;
557 
558         PetscCall(KSPGetPC(deluxe_ctx->change[i], &pc));
559         PetscCall(PCSetType(pc, PCLU));
560         PetscCall(KSPSetFromOptions(deluxe_ctx->change[i]));
561       }
562       PetscCall(KSPSetUp(deluxe_ctx->change[i]));
563     }
564   }
565   PetscFunctionReturn(0);
566 }
567