xref: /petsc/src/ksp/pc/impls/bddc/bddcscalingbasic.c (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1 #include <petsc/private/pcbddcimpl.h>
2 #include <petsc/private/pcbddcprivateimpl.h>
3 #include <petscblaslapack.h>
4 #include <../src/mat/impls/dense/seq/dense.h>
5 
6 /* prototypes for deluxe functions */
7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
12 
13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X) {
14   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
15   const PetscScalar *b;
16   PetscScalar       *x;
17   PetscInt           n;
18   PetscBLASInt       nrhs, info, m;
19   PetscBool          flg;
20 
21   PetscFunctionBegin;
22   PetscCall(PetscBLASIntCast(A->rmap->n, &m));
23   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
24   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
25   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
26   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
27 
28   PetscCall(MatGetSize(B, NULL, &n));
29   PetscCall(PetscBLASIntCast(n, &nrhs));
30   PetscCall(MatDenseGetArrayRead(B, &b));
31   PetscCall(MatDenseGetArray(X, &x));
32   PetscCall(PetscArraycpy(x, b, m * nrhs));
33   PetscCall(MatDenseRestoreArrayRead(B, &b));
34 
35   if (A->factortype == MAT_FACTOR_LU) {
36     PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
37     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve");
38   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported");
39 
40   PetscCall(MatDenseRestoreArray(X, &x));
41   PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m)));
42   PetscFunctionReturn(0);
43 }
44 
45 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) {
46   PC_IS   *pcis   = (PC_IS *)pc->data;
47   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
48 
49   PetscFunctionBegin;
50   /* Apply partition of unity */
51   PetscCall(VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector));
52   PetscCall(VecSet(global_vector, 0.0));
53   PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE));
54   PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE));
55   PetscFunctionReturn(0);
56 }
57 
58 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) {
59   PC_IS              *pcis       = (PC_IS *)pc->data;
60   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
61   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
62 
63   PetscFunctionBegin;
64   PetscCall(VecSet(pcbddc->work_scaling, 0.0));
65   PetscCall(VecSet(y, 0.0));
66   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
67     PetscInt           i;
68     const PetscScalar *array_x, *array_D;
69     PetscScalar       *array;
70     PetscCall(VecGetArrayRead(x, &array_x));
71     PetscCall(VecGetArrayRead(pcis->D, &array_D));
72     PetscCall(VecGetArray(pcbddc->work_scaling, &array));
73     for (i = 0; i < deluxe_ctx->n_simple; i++) array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]];
74     PetscCall(VecRestoreArray(pcbddc->work_scaling, &array));
75     PetscCall(VecRestoreArrayRead(pcis->D, &array_D));
76     PetscCall(VecRestoreArrayRead(x, &array_x));
77   }
78   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
79   if (deluxe_ctx->seq_mat) {
80     PetscInt i;
81     for (i = 0; i < deluxe_ctx->seq_n; i++) {
82       if (deluxe_ctx->change) {
83         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
84         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
85         if (deluxe_ctx->change_with_qr) {
86           Mat change;
87 
88           PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
89           PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
90         } else {
91           PetscCall(KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
92         }
93       } else {
94         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
95         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
96       }
97       PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
98       if (deluxe_ctx->seq_mat_inv_sum[i]) {
99         PetscScalar *x;
100 
101         PetscCall(VecGetArray(deluxe_ctx->seq_work2[i], &x));
102         PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i], x));
103         PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i], &x));
104         PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
105         PetscCall(VecResetArray(deluxe_ctx->seq_work1[i]));
106       }
107       if (deluxe_ctx->change) {
108         Mat change;
109 
110         PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
111         PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
112         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
113         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
114       } else {
115         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
116         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
117       }
118     }
119   }
120   /* put local boundary part in global vector */
121   PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE));
122   PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE));
123   PetscFunctionReturn(0);
124 }
125 
126 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) {
127   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
128 
129   PetscFunctionBegin;
130   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
131   PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 2);
132   PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 3);
133   PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!");
134   PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector));
135   PetscFunctionReturn(0);
136 }
137 
138 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) {
139   PC_IS *pcis = (PC_IS *)pc->data;
140 
141   PetscFunctionBegin;
142   PetscCall(VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD));
143   PetscCall(VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD));
144   /* Apply partition of unity */
145   PetscCall(VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector));
146   PetscFunctionReturn(0);
147 }
148 
149 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) {
150   PC_IS              *pcis       = (PC_IS *)pc->data;
151   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
152   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
153 
154   PetscFunctionBegin;
155   /* get local boundary part of global vector */
156   PetscCall(VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD));
157   PetscCall(VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD));
158   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
159     PetscInt           i;
160     PetscScalar       *array_y;
161     const PetscScalar *array_D;
162     PetscCall(VecGetArray(y, &array_y));
163     PetscCall(VecGetArrayRead(pcis->D, &array_D));
164     for (i = 0; i < deluxe_ctx->n_simple; i++) array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
165     PetscCall(VecRestoreArrayRead(pcis->D, &array_D));
166     PetscCall(VecRestoreArray(y, &array_y));
167   }
168   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
169   if (deluxe_ctx->seq_mat) {
170     PetscInt i;
171     for (i = 0; i < deluxe_ctx->seq_n; i++) {
172       if (deluxe_ctx->change) {
173         Mat change;
174 
175         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
176         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
177         PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
178         PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
179       } else {
180         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
181         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
182       }
183       if (deluxe_ctx->seq_mat_inv_sum[i]) {
184         PetscScalar *x;
185 
186         PetscCall(VecGetArray(deluxe_ctx->seq_work1[i], &x));
187         PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i], x));
188         PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i], &x));
189         PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
190         PetscCall(VecResetArray(deluxe_ctx->seq_work2[i]));
191       }
192       PetscCall(MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
193       if (deluxe_ctx->change) {
194         if (deluxe_ctx->change_with_qr) {
195           Mat change;
196 
197           PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
198           PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
199         } else {
200           PetscCall(KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
201         }
202         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE));
203         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE));
204       } else {
205         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE));
206         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE));
207       }
208     }
209   }
210   PetscFunctionReturn(0);
211 }
212 
213 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) {
214   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
215 
216   PetscFunctionBegin;
217   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
218   PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 2);
219   PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 3);
220   PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!");
221   PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector));
222   PetscFunctionReturn(0);
223 }
224 
225 PetscErrorCode PCBDDCScalingSetUp(PC pc) {
226   PC_IS   *pcis   = (PC_IS *)pc->data;
227   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
228 
229   PetscFunctionBegin;
230   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
231   PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0));
232   /* create work vector for the operator */
233   PetscCall(VecDestroy(&pcbddc->work_scaling));
234   PetscCall(VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling));
235   /* always rebuild pcis->D */
236   if (pcis->use_stiffness_scaling) {
237     PetscScalar *a;
238     PetscInt     i, n;
239 
240     PetscCall(MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N));
241     PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD));
242     PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD));
243     PetscCall(VecAbs(pcis->D));
244     PetscCall(VecGetLocalSize(pcis->D, &n));
245     PetscCall(VecGetArray(pcis->D, &a));
246     for (i = 0; i < n; i++)
247       if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0;
248     PetscCall(VecRestoreArray(pcis->D, &a));
249   }
250   PetscCall(VecSet(pcis->vec1_global, 0.0));
251   PetscCall(VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
252   PetscCall(VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
253   PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
254   PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
255   PetscCall(VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B));
256   /* now setup */
257   if (pcbddc->use_deluxe_scaling) {
258     if (!pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingCreate_Deluxe(pc));
259     PetscCall(PCBDDCScalingSetUp_Deluxe(pc));
260     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe));
261     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe));
262   } else {
263     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic));
264     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic));
265   }
266   PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0));
267 
268   /* test */
269   if (pcbddc->dbg_flag) {
270     Mat         B0_B  = NULL;
271     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
272     Vec         vec2_global;
273     PetscViewer viewer = pcbddc->dbg_viewer;
274     PetscReal   error;
275 
276     /* extension -> from local to parallel */
277     PetscCall(VecSet(pcis->vec1_global, 0.0));
278     PetscCall(VecSetRandom(pcis->vec1_B, NULL));
279     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
280     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
281     PetscCall(VecDuplicate(pcis->vec1_global, &vec2_global));
282     PetscCall(VecCopy(pcis->vec1_global, vec2_global));
283     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
284     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
285     if (pcbddc->benign_n) {
286       IS is_dummy;
287 
288       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
289       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
290       PetscCall(ISDestroy(&is_dummy));
291       PetscCall(MatCreateVecs(B0_B, NULL, &B0_Bv));
292       PetscCall(VecDuplicate(B0_Bv, &B0_Bv2));
293       PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv));
294     }
295     PetscCall(PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global));
296     if (pcbddc->benign_saddle_point) {
297       PetscReal errorl = 0.;
298       PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
299       PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
300       if (pcbddc->benign_n) {
301         PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv2));
302         PetscCall(VecAXPY(B0_Bv, -1.0, B0_Bv2));
303         PetscCall(VecNorm(B0_Bv, NORM_INFINITY, &errorl));
304       }
305       PetscCallMPI(MPI_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc)));
306       PetscCall(PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error));
307     }
308     PetscCall(VecAXPY(pcis->vec1_global, -1.0, vec2_global));
309     PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error));
310     PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error));
311     PetscCall(VecDestroy(&vec2_global));
312 
313     /* restriction -> from parallel to local */
314     PetscCall(VecSet(pcis->vec1_global, 0.0));
315     PetscCall(VecSetRandom(pcis->vec1_B, NULL));
316     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
317     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
318     PetscCall(PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B));
319     PetscCall(VecScale(pcis->vec1_B, -1.0));
320     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
321     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
322     PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error));
323     PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error));
324     PetscCall(MatDestroy(&B0_B));
325     PetscCall(VecDestroy(&B0_Bv));
326     PetscCall(VecDestroy(&B0_Bv2));
327   }
328   PetscFunctionReturn(0);
329 }
330 
331 PetscErrorCode PCBDDCScalingDestroy(PC pc) {
332   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
333 
334   PetscFunctionBegin;
335   if (pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingDestroy_Deluxe(pc));
336   PetscCall(VecDestroy(&pcbddc->work_scaling));
337   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL));
338   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL));
339   PetscFunctionReturn(0);
340 }
341 
342 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) {
343   PC_BDDC            *pcbddc = (PC_BDDC *)pc->data;
344   PCBDDCDeluxeScaling deluxe_ctx;
345 
346   PetscFunctionBegin;
347   PetscCall(PetscNew(&deluxe_ctx));
348   pcbddc->deluxe_ctx = deluxe_ctx;
349   PetscFunctionReturn(0);
350 }
351 
352 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) {
353   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
354 
355   PetscFunctionBegin;
356   PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx));
357   PetscCall(PetscFree(pcbddc->deluxe_ctx));
358   PetscFunctionReturn(0);
359 }
360 
361 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) {
362   PetscInt i;
363 
364   PetscFunctionBegin;
365   PetscCall(PetscFree(deluxe_ctx->idx_simple_B));
366   deluxe_ctx->n_simple = 0;
367   for (i = 0; i < deluxe_ctx->seq_n; i++) {
368     PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i]));
369     PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i]));
370     PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i]));
371     PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
372     PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
373   }
374   PetscCall(PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum));
375   PetscCall(PetscFree(deluxe_ctx->workspace));
376   deluxe_ctx->seq_n = 0;
377   PetscFunctionReturn(0);
378 }
379 
380 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) {
381   PC_IS              *pcis       = (PC_IS *)pc->data;
382   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
383   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
384   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs;
385 
386   PetscFunctionBegin;
387   /* reset data structures if the topology has changed */
388   if (pcbddc->recompute_topography) PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx));
389 
390   /* Compute data structures to solve sequential problems */
391   PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc));
392 
393   /* diagonal scaling on interface dofs not contained in cc */
394   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
395     PetscInt n_com, n_dir;
396     n_com = 0;
397     if (sub_schurs->is_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_com));
398     n_dir = 0;
399     if (sub_schurs->is_dir) PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
400     if (!deluxe_ctx->n_simple) {
401       deluxe_ctx->n_simple = n_dir + n_com;
402       PetscCall(PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B));
403       if (sub_schurs->is_vertices) {
404         PetscInt        nmap;
405         const PetscInt *idxs;
406 
407         PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
408         PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B));
409         PetscCheck(nmap == n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_com);
410         PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
411       }
412       if (sub_schurs->is_dir) {
413         PetscInt        nmap;
414         const PetscInt *idxs;
415 
416         PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
417         PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com));
418         PetscCheck(nmap == n_dir, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_dir);
419         PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
420       }
421       PetscCall(PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B));
422     } else {
423       PetscCheck(deluxe_ctx->n_simple == n_dir + n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT, n_dir + n_com, deluxe_ctx->n_simple);
424     }
425   } else {
426     deluxe_ctx->n_simple     = 0;
427     deluxe_ctx->idx_simple_B = NULL;
428   }
429   PetscFunctionReturn(0);
430 }
431 
432 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) {
433   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
434   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
435   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs;
436   PetscScalar        *matdata, *matdata2;
437   PetscInt            i, max_subset_size, cum, cum2;
438   const PetscInt     *idxs;
439   PetscBool           newsetup = PETSC_FALSE;
440 
441   PetscFunctionBegin;
442   PetscCheck(sub_schurs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Missing PCBDDCSubSchurs");
443   if (!sub_schurs->n_subs) PetscFunctionReturn(0);
444 
445   /* Allocate arrays for subproblems */
446   if (!deluxe_ctx->seq_n) {
447     deluxe_ctx->seq_n = sub_schurs->n_subs;
448     PetscCall(PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum));
449     newsetup = PETSC_TRUE;
450   } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT, deluxe_ctx->seq_n, sub_schurs->n_subs);
451 
452   /* the change of basis is just a reference to sub_schurs->change (if any) */
453   deluxe_ctx->change         = sub_schurs->change;
454   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
455 
456   /* Create objects for deluxe */
457   max_subset_size = 0;
458   for (i = 0; i < sub_schurs->n_subs; i++) {
459     PetscInt subset_size;
460     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
461     max_subset_size = PetscMax(subset_size, max_subset_size);
462   }
463   if (newsetup) PetscCall(PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace));
464   cum = cum2 = 0;
465   PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
466   PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata));
467   PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2));
468   for (i = 0; i < deluxe_ctx->seq_n; i++) {
469     PetscInt subset_size;
470 
471     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
472     if (newsetup) {
473       IS sub;
474       /* work vectors */
475       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i]));
476       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i]));
477 
478       /* scatters */
479       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub));
480       PetscCall(VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i]));
481       PetscCall(ISDestroy(&sub));
482     }
483 
484     /* S_E_j */
485     PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
486     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i]));
487 
488     /* \sum_k S^k_E_j */
489     PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
490     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i]));
491     PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef));
492     PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian));
493     if (sub_schurs->is_hermitian) {
494       PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL));
495     } else {
496       PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL));
497     }
498     if (pcbddc->deluxe_singlemat) {
499       Mat X, Y;
500       if (!sub_schurs->is_hermitian) {
501         PetscCall(MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X));
502       } else {
503         PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]));
504         X = deluxe_ctx->seq_mat[i];
505       }
506       PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y));
507       if (!sub_schurs->is_hermitian) {
508         PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y));
509       } else {
510         PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y));
511       }
512 
513       PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
514       PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
515       PetscCall(MatDestroy(&X));
516       if (deluxe_ctx->change) {
517         Mat C, CY;
518         PetscCheck(deluxe_ctx->change_with_qr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only QR based change of basis");
519         PetscCall(KSPGetOperators(deluxe_ctx->change[i], &C, NULL));
520         PetscCall(MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CY));
521         PetscCall(MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
522         PetscCall(MatDestroy(&CY));
523         PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */
524       }
525       PetscCall(MatTranspose(Y, MAT_INPLACE_MATRIX, &Y));
526       deluxe_ctx->seq_mat[i] = Y;
527     }
528     cum += subset_size;
529     cum2 += subset_size * subset_size;
530   }
531   PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
532   PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata));
533   PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2));
534   if (pcbddc->deluxe_singlemat) {
535     deluxe_ctx->change         = NULL;
536     deluxe_ctx->change_with_qr = PETSC_FALSE;
537   }
538 
539   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
540     for (i = 0; i < deluxe_ctx->seq_n; i++) {
541       if (newsetup) {
542         PC pc;
543 
544         PetscCall(KSPGetPC(deluxe_ctx->change[i], &pc));
545         PetscCall(PCSetType(pc, PCLU));
546         PetscCall(KSPSetFromOptions(deluxe_ctx->change[i]));
547       }
548       PetscCall(KSPSetUp(deluxe_ctx->change[i]));
549     }
550   }
551   PetscFunctionReturn(0);
552 }
553