xref: /petsc/src/ksp/pc/impls/bddc/bddcscalingbasic.c (revision 57d508425293f0bb93f59574d14951d8faac9af8)
1 #include <petsc/private/pcbddcimpl.h>
2 #include <petsc/private/pcbddcprivateimpl.h>
3 #include <petscblaslapack.h>
4 #include <../src/mat/impls/dense/seq/dense.h>
5 
6 /* prototypes for deluxe functions */
7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
12 
13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X)
14 {
15   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
16   const PetscScalar *b;
17   PetscScalar       *x;
18   PetscInt           n;
19   PetscBLASInt       nrhs, info, m;
20   PetscBool          flg;
21 
22   PetscFunctionBegin;
23   PetscCall(PetscBLASIntCast(A->rmap->n, &m));
24   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
25   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
26   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
27   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
28 
29   PetscCall(MatGetSize(B, NULL, &n));
30   PetscCall(PetscBLASIntCast(n, &nrhs));
31   PetscCall(MatDenseGetArrayRead(B, &b));
32   PetscCall(MatDenseGetArray(X, &x));
33   PetscCall(PetscArraycpy(x, b, m * nrhs));
34   PetscCall(MatDenseRestoreArrayRead(B, &b));
35 
36   PetscCheck(A->factortype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported");
37   PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
38   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve");
39 
40   PetscCall(MatDenseRestoreArray(X, &x));
41   PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m)));
42   PetscFunctionReturn(PETSC_SUCCESS);
43 }
44 
45 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
46 {
47   PC_IS   *pcis   = (PC_IS *)pc->data;
48   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
49 
50   PetscFunctionBegin;
51   /* Apply partition of unity */
52   PetscCall(VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector));
53   PetscCall(VecSet(global_vector, 0.0));
54   PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE));
55   PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE));
56   PetscFunctionReturn(PETSC_SUCCESS);
57 }
58 
59 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
60 {
61   PC_IS              *pcis       = (PC_IS *)pc->data;
62   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
63   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
64 
65   PetscFunctionBegin;
66   PetscCall(VecSet(pcbddc->work_scaling, 0.0));
67   PetscCall(VecSet(y, 0.0));
68   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
69     PetscInt           i;
70     const PetscScalar *array_x, *array_D;
71     PetscScalar       *array;
72     PetscCall(VecGetArrayRead(x, &array_x));
73     PetscCall(VecGetArrayRead(pcis->D, &array_D));
74     PetscCall(VecGetArray(pcbddc->work_scaling, &array));
75     for (i = 0; i < deluxe_ctx->n_simple; i++) array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]];
76     PetscCall(VecRestoreArray(pcbddc->work_scaling, &array));
77     PetscCall(VecRestoreArrayRead(pcis->D, &array_D));
78     PetscCall(VecRestoreArrayRead(x, &array_x));
79   }
80   /* sequential part : all problems and Schur applications collapsed into a single matrix-vector multiplication or a matvec and a solve */
81   if (deluxe_ctx->seq_mat) {
82     PetscInt i;
83     for (i = 0; i < deluxe_ctx->seq_n; i++) {
84       if (deluxe_ctx->change) {
85         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
86         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
87         if (deluxe_ctx->change_with_qr) {
88           Mat change;
89 
90           PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
91           PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
92         } else {
93           PetscCall(KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
94         }
95       } else {
96         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
97         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
98       }
99       PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
100       if (deluxe_ctx->seq_mat_inv_sum[i]) {
101         PetscScalar *x;
102 
103         PetscCall(VecGetArray(deluxe_ctx->seq_work2[i], &x));
104         PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i], x));
105         PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i], &x));
106         PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
107         PetscCall(VecResetArray(deluxe_ctx->seq_work1[i]));
108       }
109       if (deluxe_ctx->change) {
110         Mat change;
111 
112         PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
113         PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
114         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
115         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
116       } else {
117         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
118         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
119       }
120     }
121   }
122   /* put local boundary part in global vector */
123   PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE));
124   PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE));
125   PetscFunctionReturn(PETSC_SUCCESS);
126 }
127 
128 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
129 {
130   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
131 
132   PetscFunctionBegin;
133   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
134   PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 2);
135   PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 3);
136   PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!");
137   PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector));
138   PetscFunctionReturn(PETSC_SUCCESS);
139 }
140 
141 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
142 {
143   PC_IS *pcis = (PC_IS *)pc->data;
144 
145   PetscFunctionBegin;
146   PetscCall(VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD));
147   PetscCall(VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD));
148   /* Apply partition of unity */
149   PetscCall(VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector));
150   PetscFunctionReturn(PETSC_SUCCESS);
151 }
152 
153 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
154 {
155   PC_IS              *pcis       = (PC_IS *)pc->data;
156   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
157   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
158 
159   PetscFunctionBegin;
160   /* get local boundary part of global vector */
161   PetscCall(VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD));
162   PetscCall(VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD));
163   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
164     PetscInt           i;
165     PetscScalar       *array_y;
166     const PetscScalar *array_D;
167     PetscCall(VecGetArray(y, &array_y));
168     PetscCall(VecGetArrayRead(pcis->D, &array_D));
169     for (i = 0; i < deluxe_ctx->n_simple; i++) array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
170     PetscCall(VecRestoreArrayRead(pcis->D, &array_D));
171     PetscCall(VecRestoreArray(y, &array_y));
172   }
173   /* sequential part : all problems and Schur applications collapsed into a single matrix-vector multiplication or a matvec and a solve */
174   if (deluxe_ctx->seq_mat) {
175     PetscInt i;
176     for (i = 0; i < deluxe_ctx->seq_n; i++) {
177       if (deluxe_ctx->change) {
178         Mat change;
179 
180         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
181         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
182         PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
183         PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
184       } else {
185         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
186         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
187       }
188       if (deluxe_ctx->seq_mat_inv_sum[i]) {
189         PetscScalar *x;
190 
191         PetscCall(VecGetArray(deluxe_ctx->seq_work1[i], &x));
192         PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i], x));
193         PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i], &x));
194         PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
195         PetscCall(VecResetArray(deluxe_ctx->seq_work2[i]));
196       }
197       PetscCall(MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
198       if (deluxe_ctx->change) {
199         if (deluxe_ctx->change_with_qr) {
200           Mat change;
201 
202           PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
203           PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
204         } else {
205           PetscCall(KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
206         }
207         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE));
208         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE));
209       } else {
210         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE));
211         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE));
212       }
213     }
214   }
215   PetscFunctionReturn(PETSC_SUCCESS);
216 }
217 
218 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
219 {
220   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
221 
222   PetscFunctionBegin;
223   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
224   PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 2);
225   PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 3);
226   PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!");
227   PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 PetscErrorCode PCBDDCScalingSetUp(PC pc)
232 {
233   PC_IS   *pcis   = (PC_IS *)pc->data;
234   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
235 
236   PetscFunctionBegin;
237   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
238   PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0));
239   /* create work vector for the operator */
240   PetscCall(VecDestroy(&pcbddc->work_scaling));
241   PetscCall(VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling));
242   /* always rebuild pcis->D */
243   if (pcis->use_stiffness_scaling) {
244     PetscScalar *a;
245     PetscInt     i, n;
246 
247     PetscCall(MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N));
248     PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD));
249     PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD));
250     PetscCall(VecAbs(pcis->D));
251     PetscCall(VecGetLocalSize(pcis->D, &n));
252     PetscCall(VecGetArray(pcis->D, &a));
253     for (i = 0; i < n; i++)
254       if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0;
255     PetscCall(VecRestoreArray(pcis->D, &a));
256   }
257   PetscCall(VecSet(pcis->vec1_global, 0.0));
258   PetscCall(VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
259   PetscCall(VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
260   PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
261   PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
262   PetscCall(VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B));
263   /* now setup */
264   if (pcbddc->use_deluxe_scaling) {
265     if (!pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingCreate_Deluxe(pc));
266     PetscCall(PCBDDCScalingSetUp_Deluxe(pc));
267     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe));
268     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe));
269   } else {
270     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic));
271     PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic));
272   }
273   PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0));
274 
275   /* test */
276   if (pcbddc->dbg_flag) {
277     Mat         B0_B  = NULL;
278     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
279     Vec         vec2_global;
280     PetscViewer viewer = pcbddc->dbg_viewer;
281     PetscReal   error;
282 
283     /* extension -> from local to parallel */
284     PetscCall(VecSet(pcis->vec1_global, 0.0));
285     PetscCall(VecSetRandom(pcis->vec1_B, NULL));
286     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
287     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
288     PetscCall(VecDuplicate(pcis->vec1_global, &vec2_global));
289     PetscCall(VecCopy(pcis->vec1_global, vec2_global));
290     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
291     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
292     if (pcbddc->benign_n) {
293       IS is_dummy;
294 
295       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
296       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
297       PetscCall(ISDestroy(&is_dummy));
298       PetscCall(MatCreateVecs(B0_B, NULL, &B0_Bv));
299       PetscCall(VecDuplicate(B0_Bv, &B0_Bv2));
300       PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv));
301     }
302     PetscCall(PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global));
303     if (pcbddc->benign_saddle_point) {
304       PetscReal errorl = 0.;
305       PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
306       PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
307       if (pcbddc->benign_n) {
308         PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv2));
309         PetscCall(VecAXPY(B0_Bv, -1.0, B0_Bv2));
310         PetscCall(VecNorm(B0_Bv, NORM_INFINITY, &errorl));
311       }
312       PetscCallMPI(MPIU_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc)));
313       PetscCall(PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error));
314     }
315     PetscCall(VecAXPY(pcis->vec1_global, -1.0, vec2_global));
316     PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error));
317     PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error));
318     PetscCall(VecDestroy(&vec2_global));
319 
320     /* restriction -> from parallel to local */
321     PetscCall(VecSet(pcis->vec1_global, 0.0));
322     PetscCall(VecSetRandom(pcis->vec1_B, NULL));
323     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
324     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
325     PetscCall(PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B));
326     PetscCall(VecScale(pcis->vec1_B, -1.0));
327     PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
328     PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
329     PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error));
330     PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error));
331     PetscCall(MatDestroy(&B0_B));
332     PetscCall(VecDestroy(&B0_Bv));
333     PetscCall(VecDestroy(&B0_Bv2));
334   }
335   PetscFunctionReturn(PETSC_SUCCESS);
336 }
337 
338 PetscErrorCode PCBDDCScalingDestroy(PC pc)
339 {
340   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
341 
342   PetscFunctionBegin;
343   if (pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingDestroy_Deluxe(pc));
344   PetscCall(VecDestroy(&pcbddc->work_scaling));
345   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL));
346   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL));
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
351 {
352   PC_BDDC            *pcbddc = (PC_BDDC *)pc->data;
353   PCBDDCDeluxeScaling deluxe_ctx;
354 
355   PetscFunctionBegin;
356   PetscCall(PetscNew(&deluxe_ctx));
357   pcbddc->deluxe_ctx = deluxe_ctx;
358   PetscFunctionReturn(PETSC_SUCCESS);
359 }
360 
361 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
362 {
363   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
364 
365   PetscFunctionBegin;
366   PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx));
367   PetscCall(PetscFree(pcbddc->deluxe_ctx));
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 
371 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
372 {
373   PetscInt i;
374 
375   PetscFunctionBegin;
376   PetscCall(PetscFree(deluxe_ctx->idx_simple_B));
377   deluxe_ctx->n_simple = 0;
378   for (i = 0; i < deluxe_ctx->seq_n; i++) {
379     PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i]));
380     PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i]));
381     PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i]));
382     PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
383     PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
384   }
385   PetscCall(PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum));
386   PetscCall(PetscFree(deluxe_ctx->workspace));
387   deluxe_ctx->seq_n = 0;
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
392 {
393   PC_IS              *pcis       = (PC_IS *)pc->data;
394   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
395   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
396   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs;
397 
398   PetscFunctionBegin;
399   /* reset data structures if the topology has changed */
400   if (pcbddc->recompute_topography) PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx));
401 
402   /* Compute data structures to solve sequential problems */
403   PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc));
404 
405   /* diagonal scaling on interface dofs not contained in cc */
406   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
407     PetscInt n_com, n_dir;
408     n_com = 0;
409     if (sub_schurs->is_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_com));
410     n_dir = 0;
411     if (sub_schurs->is_dir) PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
412     if (!deluxe_ctx->n_simple) {
413       deluxe_ctx->n_simple = n_dir + n_com;
414       PetscCall(PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B));
415       if (n_com) {
416         PetscInt        nmap;
417         const PetscInt *idxs;
418 
419         PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
420         PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B));
421         PetscCheck(nmap == n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_com);
422         PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
423       }
424       if (n_dir) {
425         PetscInt        nmap;
426         const PetscInt *idxs;
427 
428         PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
429         PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com));
430         PetscCheck(nmap == n_dir, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_dir);
431         PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
432       }
433       PetscCall(PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B));
434     } else {
435       PetscCheck(deluxe_ctx->n_simple == n_dir + n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT, n_dir + n_com, deluxe_ctx->n_simple);
436     }
437   } else {
438     deluxe_ctx->n_simple     = 0;
439     deluxe_ctx->idx_simple_B = NULL;
440   }
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 
444 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
445 {
446   PC_BDDC            *pcbddc     = (PC_BDDC *)pc->data;
447   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
448   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs;
449   PetscScalar        *matdata, *matdata2;
450   PetscInt            i, max_subset_size, cum, cum2;
451   const PetscInt     *idxs;
452   PetscBool           newsetup = PETSC_FALSE;
453 
454   PetscFunctionBegin;
455   PetscCheck(sub_schurs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Missing PCBDDCSubSchurs");
456   if (!sub_schurs->n_subs) PetscFunctionReturn(PETSC_SUCCESS);
457 
458   /* Allocate arrays for subproblems */
459   if (!deluxe_ctx->seq_n) {
460     deluxe_ctx->seq_n = sub_schurs->n_subs;
461     PetscCall(PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum));
462     newsetup = PETSC_TRUE;
463   } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT, deluxe_ctx->seq_n, sub_schurs->n_subs);
464 
465   /* the change of basis is just a reference to sub_schurs->change (if any) */
466   deluxe_ctx->change         = sub_schurs->change;
467   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
468 
469   /* Create objects for deluxe */
470   max_subset_size = 0;
471   for (i = 0; i < sub_schurs->n_subs; i++) {
472     PetscInt subset_size;
473     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
474     max_subset_size = PetscMax(subset_size, max_subset_size);
475   }
476   if (newsetup) PetscCall(PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace));
477   cum = cum2 = 0;
478   PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
479   PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata));
480   PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2));
481   for (i = 0; i < deluxe_ctx->seq_n; i++) {
482     PetscInt subset_size;
483 
484     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
485     if (newsetup) {
486       IS sub;
487       /* work vectors */
488       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i]));
489       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i]));
490 
491       /* scatters */
492       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub));
493       PetscCall(VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i]));
494       PetscCall(ISDestroy(&sub));
495     }
496 
497     /* S_E_j */
498     PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
499     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i]));
500 
501     /* \sum_k S^k_E_j */
502     PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
503     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i]));
504     PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef));
505     PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian));
506     switch (sub_schurs->mat_factor_type) {
507     case MAT_FACTOR_CHOLESKY:
508       PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL));
509       break;
510     case MAT_FACTOR_LU:
511       PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL));
512       break;
513     default:
514       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
515     }
516     if (pcbddc->deluxe_singlemat) {
517       Mat X, Y;
518       if (!sub_schurs->is_hermitian) {
519         PetscCall(MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X));
520       } else {
521         PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]));
522         X = deluxe_ctx->seq_mat[i];
523       }
524       PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y));
525       if (!sub_schurs->is_hermitian) {
526         PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y));
527       } else {
528         PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y));
529       }
530       PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
531       PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
532       PetscCall(MatDestroy(&X));
533       if (deluxe_ctx->change) {
534         Mat C, CY;
535         PetscCheck(deluxe_ctx->change_with_qr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only QR based change of basis");
536         PetscCall(KSPGetOperators(deluxe_ctx->change[i], &C, NULL));
537         PetscCall(MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_CURRENT, &CY));
538         PetscCall(MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y));
539         PetscCall(MatDestroy(&CY));
540         PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */
541       }
542       PetscCall(MatTranspose(Y, MAT_INPLACE_MATRIX, &Y));
543       deluxe_ctx->seq_mat[i] = Y;
544     }
545     cum += subset_size;
546     cum2 += subset_size * subset_size;
547   }
548   PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
549   PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata));
550   PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2));
551   if (pcbddc->deluxe_singlemat) {
552     deluxe_ctx->change         = NULL;
553     deluxe_ctx->change_with_qr = PETSC_FALSE;
554   }
555 
556   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
557     for (i = 0; i < deluxe_ctx->seq_n; i++) {
558       if (newsetup) {
559         PC pc;
560 
561         PetscCall(KSPGetPC(deluxe_ctx->change[i], &pc));
562         PetscCall(PCSetType(pc, PCLU));
563         PetscCall(KSPSetFromOptions(deluxe_ctx->change[i]));
564       }
565       PetscCall(KSPSetUp(deluxe_ctx->change[i]));
566     }
567   }
568   PetscFunctionReturn(PETSC_SUCCESS);
569 }
570