xref: /petsc/src/ksp/pc/impls/bddc/bddcscalingbasic.c (revision 3307d110e72ee4e6d2468971620073eb5ff93529)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 #include <../src/mat/impls/dense/seq/dense.h>
5 
6 /* prototypes for deluxe functions */
7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
12 
13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
14 {
15   Mat_SeqDense      *mat = (Mat_SeqDense*)A->data;
16   const PetscScalar *b;
17   PetscScalar       *x;
18   PetscInt          n;
19   PetscBLASInt      nrhs,info,m;
20   PetscBool         flg;
21 
22   PetscFunctionBegin;
23   PetscCall(PetscBLASIntCast(A->rmap->n,&m));
24   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL));
25   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
26   PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL));
27   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
28 
29   PetscCall(MatGetSize(B,NULL,&n));
30   PetscCall(PetscBLASIntCast(n,&nrhs));
31   PetscCall(MatDenseGetArrayRead(B,&b));
32   PetscCall(MatDenseGetArray(X,&x));
33   PetscCall(PetscArraycpy(x,b,m*nrhs));
34   PetscCall(MatDenseRestoreArrayRead(B,&b));
35 
36   if (A->factortype == MAT_FACTOR_LU) {
37     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
38     PetscCheck(!info,PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
39   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported");
40 
41   PetscCall(MatDenseRestoreArray(X,&x));
42   PetscCall(PetscLogFlops(nrhs*(2.0*m*m - m)));
43   PetscFunctionReturn(0);
44 }
45 
46 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
47 {
48   PC_IS*         pcis = (PC_IS*)pc->data;
49   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
50 
51   PetscFunctionBegin;
52   /* Apply partition of unity */
53   PetscCall(VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector));
54   PetscCall(VecSet(global_vector,0.0));
55   PetscCall(VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE));
56   PetscCall(VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE));
57   PetscFunctionReturn(0);
58 }
59 
60 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
61 {
62   PC_IS*              pcis=(PC_IS*)pc->data;
63   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
64   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
65 
66   PetscFunctionBegin;
67   PetscCall(VecSet(pcbddc->work_scaling,0.0));
68   PetscCall(VecSet(y,0.0));
69   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
70     PetscInt          i;
71     const PetscScalar *array_x,*array_D;
72     PetscScalar       *array;
73     PetscCall(VecGetArrayRead(x,&array_x));
74     PetscCall(VecGetArrayRead(pcis->D,&array_D));
75     PetscCall(VecGetArray(pcbddc->work_scaling,&array));
76     for (i=0;i<deluxe_ctx->n_simple;i++) {
77       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
78     }
79     PetscCall(VecRestoreArray(pcbddc->work_scaling,&array));
80     PetscCall(VecRestoreArrayRead(pcis->D,&array_D));
81     PetscCall(VecRestoreArrayRead(x,&array_x));
82   }
83   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
84   if (deluxe_ctx->seq_mat) {
85     PetscInt i;
86     for (i=0;i<deluxe_ctx->seq_n;i++) {
87       if (deluxe_ctx->change) {
88         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD));
89         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD));
90         if (deluxe_ctx->change_with_qr) {
91           Mat change;
92 
93           PetscCall(KSPGetOperators(deluxe_ctx->change[i],&change,NULL));
94           PetscCall(MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]));
95         } else {
96           PetscCall(KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]));
97         }
98       } else {
99         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD));
100         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD));
101       }
102       PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]));
103       if (deluxe_ctx->seq_mat_inv_sum[i]) {
104         PetscScalar *x;
105 
106         PetscCall(VecGetArray(deluxe_ctx->seq_work2[i],&x));
107         PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i],x));
108         PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i],&x));
109         PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]));
110         PetscCall(VecResetArray(deluxe_ctx->seq_work1[i]));
111       }
112       if (deluxe_ctx->change) {
113         Mat change;
114 
115         PetscCall(KSPGetOperators(deluxe_ctx->change[i],&change,NULL));
116         PetscCall(MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]));
117         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE));
118         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE));
119       } else {
120         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE));
121         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE));
122       }
123     }
124   }
125   /* put local boundary part in global vector */
126   PetscCall(VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE));
127   PetscCall(VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE));
128   PetscFunctionReturn(0);
129 }
130 
131 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
132 {
133   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
134 
135   PetscFunctionBegin;
136   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
137   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2);
138   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3);
139   PetscCheck(local_interface_vector != pcbddc->work_scaling,PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!");
140   PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));
141   PetscFunctionReturn(0);
142 }
143 
144 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
145 {
146   PC_IS          *pcis = (PC_IS*)pc->data;
147 
148   PetscFunctionBegin;
149   PetscCall(VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD));
150   PetscCall(VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD));
151   /* Apply partition of unity */
152   PetscCall(VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector));
153   PetscFunctionReturn(0);
154 }
155 
156 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
157 {
158   PC_IS*              pcis=(PC_IS*)pc->data;
159   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
160   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
161 
162   PetscFunctionBegin;
163   /* get local boundary part of global vector */
164   PetscCall(VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD));
165   PetscCall(VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD));
166   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
167     PetscInt          i;
168     PetscScalar       *array_y;
169     const PetscScalar *array_D;
170     PetscCall(VecGetArray(y,&array_y));
171     PetscCall(VecGetArrayRead(pcis->D,&array_D));
172     for (i=0;i<deluxe_ctx->n_simple;i++) {
173       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
174     }
175     PetscCall(VecRestoreArrayRead(pcis->D,&array_D));
176     PetscCall(VecRestoreArray(y,&array_y));
177   }
178   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
179   if (deluxe_ctx->seq_mat) {
180     PetscInt i;
181     for (i=0;i<deluxe_ctx->seq_n;i++) {
182       if (deluxe_ctx->change) {
183         Mat change;
184 
185         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD));
186         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD));
187         PetscCall(KSPGetOperators(deluxe_ctx->change[i],&change,NULL));
188         PetscCall(MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]));
189       } else {
190         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD));
191         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD));
192       }
193       if (deluxe_ctx->seq_mat_inv_sum[i]) {
194         PetscScalar *x;
195 
196         PetscCall(VecGetArray(deluxe_ctx->seq_work1[i],&x));
197         PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i],x));
198         PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i],&x));
199         PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]));
200         PetscCall(VecResetArray(deluxe_ctx->seq_work2[i]));
201       }
202       PetscCall(MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]));
203       if (deluxe_ctx->change) {
204         if (deluxe_ctx->change_with_qr) {
205           Mat change;
206 
207           PetscCall(KSPGetOperators(deluxe_ctx->change[i],&change,NULL));
208           PetscCall(MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]));
209         } else {
210           PetscCall(KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]));
211         }
212         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE));
213         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE));
214       } else {
215         PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE));
216         PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE));
217       }
218     }
219   }
220   PetscFunctionReturn(0);
221 }
222 
223 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
224 {
225   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
226 
227   PetscFunctionBegin;
228   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
229   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2);
230   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3);
231   PetscCheck(local_interface_vector != pcbddc->work_scaling,PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!");
232   PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));
233   PetscFunctionReturn(0);
234 }
235 
236 PetscErrorCode PCBDDCScalingSetUp(PC pc)
237 {
238   PC_IS*         pcis=(PC_IS*)pc->data;
239   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;
240 
241   PetscFunctionBegin;
242   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
243   PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0));
244   /* create work vector for the operator */
245   PetscCall(VecDestroy(&pcbddc->work_scaling));
246   PetscCall(VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling));
247   /* always rebuild pcis->D */
248   if (pcis->use_stiffness_scaling) {
249     PetscScalar *a;
250     PetscInt    i,n;
251 
252     PetscCall(MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N));
253     PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD));
254     PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD));
255     PetscCall(VecAbs(pcis->D));
256     PetscCall(VecGetLocalSize(pcis->D,&n));
257     PetscCall(VecGetArray(pcis->D,&a));
258     for (i=0;i<n;i++) if (PetscAbsScalar(a[i])<PETSC_SMALL) a[i] = 1.0;
259     PetscCall(VecRestoreArray(pcis->D,&a));
260   }
261   PetscCall(VecSet(pcis->vec1_global,0.0));
262   PetscCall(VecScatterBegin(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
263   PetscCall(VecScatterEnd(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
264   PetscCall(VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD));
265   PetscCall(VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD));
266   PetscCall(VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B));
267   /* now setup */
268   if (pcbddc->use_deluxe_scaling) {
269     if (!pcbddc->deluxe_ctx) {
270       PetscCall(PCBDDCScalingCreate_Deluxe(pc));
271     }
272     PetscCall(PCBDDCScalingSetUp_Deluxe(pc));
273     PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe));
274     PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe));
275   } else {
276     PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic));
277     PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic));
278   }
279   PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0));
280 
281   /* test */
282   if (pcbddc->dbg_flag) {
283     Mat         B0_B = NULL;
284     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
285     Vec         vec2_global;
286     PetscViewer viewer = pcbddc->dbg_viewer;
287     PetscReal   error;
288 
289     /* extension -> from local to parallel */
290     PetscCall(VecSet(pcis->vec1_global,0.0));
291     PetscCall(VecSetRandom(pcis->vec1_B,NULL));
292     PetscCall(VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
293     PetscCall(VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
294     PetscCall(VecDuplicate(pcis->vec1_global,&vec2_global));
295     PetscCall(VecCopy(pcis->vec1_global,vec2_global));
296     PetscCall(VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD));
297     PetscCall(VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD));
298     if (pcbddc->benign_n) {
299       IS is_dummy;
300 
301       PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy));
302       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B));
303       PetscCall(ISDestroy(&is_dummy));
304       PetscCall(MatCreateVecs(B0_B,NULL,&B0_Bv));
305       PetscCall(VecDuplicate(B0_Bv,&B0_Bv2));
306       PetscCall(MatMult(B0_B,pcis->vec1_B,B0_Bv));
307     }
308     PetscCall(PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global));
309     if (pcbddc->benign_saddle_point) {
310       PetscReal errorl = 0.;
311       PetscCall(VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD));
312       PetscCall(VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD));
313       if (pcbddc->benign_n) {
314         PetscCall(MatMult(B0_B,pcis->vec1_B,B0_Bv2));
315         PetscCall(VecAXPY(B0_Bv,-1.0,B0_Bv2));
316         PetscCall(VecNorm(B0_Bv,NORM_INFINITY,&errorl));
317       }
318       PetscCallMPI(MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc)));
319       PetscCall(PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",(double)error));
320     }
321     PetscCall(VecAXPY(pcis->vec1_global,-1.0,vec2_global));
322     PetscCall(VecNorm(pcis->vec1_global,NORM_INFINITY,&error));
323     PetscCall(PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",(double)error));
324     PetscCall(VecDestroy(&vec2_global));
325 
326     /* restriction -> from parallel to local */
327     PetscCall(VecSet(pcis->vec1_global,0.0));
328     PetscCall(VecSetRandom(pcis->vec1_B,NULL));
329     PetscCall(VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
330     PetscCall(VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
331     PetscCall(PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B));
332     PetscCall(VecScale(pcis->vec1_B,-1.0));
333     PetscCall(VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
334     PetscCall(VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
335     PetscCall(VecNorm(pcis->vec1_global,NORM_INFINITY,&error));
336     PetscCall(PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",(double)error));
337     PetscCall(MatDestroy(&B0_B));
338     PetscCall(VecDestroy(&B0_Bv));
339     PetscCall(VecDestroy(&B0_Bv2));
340   }
341   PetscFunctionReturn(0);
342 }
343 
344 PetscErrorCode PCBDDCScalingDestroy(PC pc)
345 {
346   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;
347 
348   PetscFunctionBegin;
349   if (pcbddc->deluxe_ctx) {
350     PetscCall(PCBDDCScalingDestroy_Deluxe(pc));
351   }
352   PetscCall(VecDestroy(&pcbddc->work_scaling));
353   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL));
354   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL));
355   PetscFunctionReturn(0);
356 }
357 
358 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
359 {
360   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
361   PCBDDCDeluxeScaling deluxe_ctx;
362 
363   PetscFunctionBegin;
364   PetscCall(PetscNew(&deluxe_ctx));
365   pcbddc->deluxe_ctx = deluxe_ctx;
366   PetscFunctionReturn(0);
367 }
368 
369 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
370 {
371   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
372 
373   PetscFunctionBegin;
374   PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx));
375   PetscCall(PetscFree(pcbddc->deluxe_ctx));
376   PetscFunctionReturn(0);
377 }
378 
379 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
380 {
381   PetscInt       i;
382 
383   PetscFunctionBegin;
384   PetscCall(PetscFree(deluxe_ctx->idx_simple_B));
385   deluxe_ctx->n_simple = 0;
386   for (i=0;i<deluxe_ctx->seq_n;i++) {
387     PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i]));
388     PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i]));
389     PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i]));
390     PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
391     PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
392   }
393   PetscCall(PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum));
394   PetscCall(PetscFree(deluxe_ctx->workspace));
395   deluxe_ctx->seq_n = 0;
396   PetscFunctionReturn(0);
397 }
398 
399 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
400 {
401   PC_IS               *pcis=(PC_IS*)pc->data;
402   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
403   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
404   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
405 
406   PetscFunctionBegin;
407   /* reset data structures if the topology has changed */
408   if (pcbddc->recompute_topography) {
409     PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx));
410   }
411 
412   /* Compute data structures to solve sequential problems */
413   PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc));
414 
415   /* diagonal scaling on interface dofs not contained in cc */
416   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
417     PetscInt n_com,n_dir;
418     n_com = 0;
419     if (sub_schurs->is_vertices) {
420       PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_com));
421     }
422     n_dir = 0;
423     if (sub_schurs->is_dir) {
424       PetscCall(ISGetLocalSize(sub_schurs->is_dir,&n_dir));
425     }
426     if (!deluxe_ctx->n_simple) {
427       deluxe_ctx->n_simple = n_dir + n_com;
428       PetscCall(PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B));
429       if (sub_schurs->is_vertices) {
430         PetscInt       nmap;
431         const PetscInt *idxs;
432 
433         PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs));
434         PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B));
435         PetscCheck(nmap == n_com,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT,nmap,n_com);
436         PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs));
437       }
438       if (sub_schurs->is_dir) {
439         PetscInt       nmap;
440         const PetscInt *idxs;
441 
442         PetscCall(ISGetIndices(sub_schurs->is_dir,&idxs));
443         PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com));
444         PetscCheck(nmap == n_dir,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT,nmap,n_dir);
445         PetscCall(ISRestoreIndices(sub_schurs->is_dir,&idxs));
446       }
447       PetscCall(PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B));
448     } else {
449       PetscCheckFalse(deluxe_ctx->n_simple != n_dir + n_com,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT,n_dir + n_com,deluxe_ctx->n_simple);
450     }
451   } else {
452     deluxe_ctx->n_simple = 0;
453     deluxe_ctx->idx_simple_B = NULL;
454   }
455   PetscFunctionReturn(0);
456 }
457 
458 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
459 {
460   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
461   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
462   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
463   PetscScalar            *matdata,*matdata2;
464   PetscInt               i,max_subset_size,cum,cum2;
465   const PetscInt         *idxs;
466   PetscBool              newsetup = PETSC_FALSE;
467 
468   PetscFunctionBegin;
469   PetscCheck(sub_schurs,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Missing PCBDDCSubSchurs");
470   if (!sub_schurs->n_subs) PetscFunctionReturn(0);
471 
472   /* Allocate arrays for subproblems */
473   if (!deluxe_ctx->seq_n) {
474     deluxe_ctx->seq_n = sub_schurs->n_subs;
475     PetscCall(PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum));
476     newsetup = PETSC_TRUE;
477   } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT,deluxe_ctx->seq_n,sub_schurs->n_subs);
478 
479   /* the change of basis is just a reference to sub_schurs->change (if any) */
480   deluxe_ctx->change         = sub_schurs->change;
481   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
482 
483   /* Create objects for deluxe */
484   max_subset_size = 0;
485   for (i=0;i<sub_schurs->n_subs;i++) {
486     PetscInt subset_size;
487     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
488     max_subset_size = PetscMax(subset_size,max_subset_size);
489   }
490   if (newsetup) {
491     PetscCall(PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace));
492   }
493   cum = cum2 = 0;
494   PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs));
495   PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata));
496   PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2));
497   for (i=0;i<deluxe_ctx->seq_n;i++) {
498     PetscInt     subset_size;
499 
500     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
501     if (newsetup) {
502       IS  sub;
503       /* work vectors */
504       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]));
505       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]));
506 
507       /* scatters */
508       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub));
509       PetscCall(VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]));
510       PetscCall(ISDestroy(&sub));
511     }
512 
513     /* S_E_j */
514     PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
515     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]));
516 
517     /* \sum_k S^k_E_j */
518     PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
519     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]));
520     PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef));
521     PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian));
522     if (sub_schurs->is_hermitian) {
523       PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL));
524     } else {
525       PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL));
526     }
527     if (pcbddc->deluxe_singlemat) {
528       Mat X,Y;
529       if (!sub_schurs->is_hermitian) {
530         PetscCall(MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X));
531       } else {
532         PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]));
533         X    = deluxe_ctx->seq_mat[i];
534       }
535       PetscCall(MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y));
536       if (!sub_schurs->is_hermitian) {
537         PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y));
538       } else {
539         PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y));
540       }
541 
542       PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
543       PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
544       PetscCall(MatDestroy(&X));
545       if (deluxe_ctx->change) {
546         Mat C,CY;
547         PetscCheck(deluxe_ctx->change_with_qr,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis");
548         PetscCall(KSPGetOperators(deluxe_ctx->change[i],&C,NULL));
549         PetscCall(MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY));
550         PetscCall(MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y));
551         PetscCall(MatDestroy(&CY));
552         PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */
553       }
554       PetscCall(MatTranspose(Y,MAT_INPLACE_MATRIX,&Y));
555       deluxe_ctx->seq_mat[i] = Y;
556     }
557     cum += subset_size;
558     cum2 += subset_size*subset_size;
559   }
560   PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs));
561   PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata));
562   PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2));
563   if (pcbddc->deluxe_singlemat) {
564     deluxe_ctx->change         = NULL;
565     deluxe_ctx->change_with_qr = PETSC_FALSE;
566   }
567 
568   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
569     for (i=0;i<deluxe_ctx->seq_n;i++) {
570       if (newsetup) {
571         PC pc;
572 
573         PetscCall(KSPGetPC(deluxe_ctx->change[i],&pc));
574         PetscCall(PCSetType(pc,PCLU));
575         PetscCall(KSPSetFromOptions(deluxe_ctx->change[i]));
576       }
577       PetscCall(KSPSetUp(deluxe_ctx->change[i]));
578     }
579   }
580   PetscFunctionReturn(0);
581 }
582