xref: /petsc/src/ksp/pc/impls/bddc/bddcscalingbasic.c (revision bef158480efac06de457f7a665168877ab3c2fd7)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 #include <../src/mat/impls/dense/seq/dense.h>
5 
6 /* prototypes for deluxe functions */
7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
12 
13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
14 {
15   Mat_SeqDense      *mat = (Mat_SeqDense*)A->data;
16   PetscErrorCode    ierr;
17   const PetscScalar *b;
18   PetscScalar       *x;
19   PetscInt          n;
20   PetscBLASInt      nrhs,info,m;
21   PetscBool         flg;
22 
23   PetscFunctionBegin;
24   ierr = PetscBLASIntCast(A->rmap->n,&m);CHKERRQ(ierr);
25   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
26   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
27   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
28   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
29 
30   ierr = MatGetSize(B,NULL,&n);CHKERRQ(ierr);
31   ierr = PetscBLASIntCast(n,&nrhs);CHKERRQ(ierr);
32   ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr);
33   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
34   ierr = PetscArraycpy(x,b,m*nrhs);CHKERRQ(ierr);
35   ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr);
36 
37   if (A->factortype == MAT_FACTOR_LU) {
38     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
39     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
40   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported");
41 
42   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
43   ierr = PetscLogFlops(nrhs*(2.0*m*m - m));CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
48 {
49   PC_IS*         pcis = (PC_IS*)pc->data;
50   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   /* Apply partition of unity */
55   ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr);
56   ierr = VecSet(global_vector,0.0);CHKERRQ(ierr);
57   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
58   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
59   PetscFunctionReturn(0);
60 }
61 
62 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
63 {
64   PC_IS*              pcis=(PC_IS*)pc->data;
65   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
66   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
67   PetscErrorCode      ierr;
68 
69   PetscFunctionBegin;
70   ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr);
71   ierr = VecSet(y,0.0);CHKERRQ(ierr);
72   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
73     PetscInt          i;
74     const PetscScalar *array_x,*array_D;
75     PetscScalar       *array;
76     ierr = VecGetArrayRead(x,&array_x);CHKERRQ(ierr);
77     ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
78     ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
79     for (i=0;i<deluxe_ctx->n_simple;i++) {
80       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
81     }
82     ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
83     ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
84     ierr = VecRestoreArrayRead(x,&array_x);CHKERRQ(ierr);
85   }
86   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
87   if (deluxe_ctx->seq_mat) {
88     PetscInt i;
89     for (i=0;i<deluxe_ctx->seq_n;i++) {
90       if (deluxe_ctx->change) {
91         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
92         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
93         if (deluxe_ctx->change_with_qr) {
94           Mat change;
95 
96           ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
97           ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
98         } else {
99           ierr = KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
100         }
101       } else {
102         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
104       }
105       ierr = MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
106       if (deluxe_ctx->seq_mat_inv_sum[i]) {
107         PetscScalar *x;
108 
109         ierr = VecGetArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr);
110         ierr = VecPlaceArray(deluxe_ctx->seq_work1[i],x);CHKERRQ(ierr);
111         ierr = VecRestoreArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr);
112         ierr = MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
113         ierr = VecResetArray(deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
114       }
115       if (deluxe_ctx->change) {
116         Mat change;
117 
118         ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
119         ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
120         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
121         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
122       } else {
123         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
124         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
125       }
126     }
127   }
128   /* put local boundary part in global vector */
129   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
130   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
135 {
136   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
137   PetscErrorCode ierr;
138 
139   PetscFunctionBegin;
140   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
141   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2);
142   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3);
143   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!");
144   ierr = PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
149 {
150   PetscErrorCode ierr;
151   PC_IS          *pcis = (PC_IS*)pc->data;
152 
153   PetscFunctionBegin;
154   ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
155   ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
156   /* Apply partition of unity */
157   ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr);
158   PetscFunctionReturn(0);
159 }
160 
161 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
162 {
163   PC_IS*              pcis=(PC_IS*)pc->data;
164   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
165   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
166   PetscErrorCode      ierr;
167 
168   PetscFunctionBegin;
169   /* get local boundary part of global vector */
170   ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
171   ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
172   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
173     PetscInt          i;
174     PetscScalar       *array_y;
175     const PetscScalar *array_D;
176     ierr = VecGetArray(y,&array_y);CHKERRQ(ierr);
177     ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
178     for (i=0;i<deluxe_ctx->n_simple;i++) {
179       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
180     }
181     ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
182     ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr);
183   }
184   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
185   if (deluxe_ctx->seq_mat) {
186     PetscInt i;
187     for (i=0;i<deluxe_ctx->seq_n;i++) {
188       if (deluxe_ctx->change) {
189         Mat change;
190 
191         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
192         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
193         ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
194         ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
195       } else {
196         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
197         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
198       }
199       if (deluxe_ctx->seq_mat_inv_sum[i]) {
200         PetscScalar *x;
201 
202         ierr = VecGetArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr);
203         ierr = VecPlaceArray(deluxe_ctx->seq_work2[i],x);CHKERRQ(ierr);
204         ierr = VecRestoreArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr);
205         ierr = MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
206         ierr = VecResetArray(deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
207       }
208       ierr = MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
209       if (deluxe_ctx->change) {
210         if (deluxe_ctx->change_with_qr) {
211           Mat change;
212 
213           ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
214           ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
215         } else {
216           ierr = KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
217         }
218         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
219         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
220       } else {
221         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
222         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
223       }
224     }
225   }
226   PetscFunctionReturn(0);
227 }
228 
229 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
230 {
231   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
232   PetscErrorCode ierr;
233 
234   PetscFunctionBegin;
235   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
236   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2);
237   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3);
238   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!");
239   ierr = PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr);
240   PetscFunctionReturn(0);
241 }
242 
243 PetscErrorCode PCBDDCScalingSetUp(PC pc)
244 {
245   PC_IS*         pcis=(PC_IS*)pc->data;
246   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;
247   PetscErrorCode ierr;
248 
249   PetscFunctionBegin;
250   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
251   ierr = PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr);
252   /* create work vector for the operator */
253   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
254   ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr);
255   /* always rebuild pcis->D */
256   if (pcis->use_stiffness_scaling) {
257     PetscScalar *a;
258     PetscInt    i,n;
259 
260     ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr);
261     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
262     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
263     ierr = VecAbs(pcis->D);CHKERRQ(ierr);
264     ierr = VecGetLocalSize(pcis->D,&n);CHKERRQ(ierr);
265     ierr = VecGetArray(pcis->D,&a);CHKERRQ(ierr);
266     for (i=0;i<n;i++) if (PetscAbsScalar(a[i])<PETSC_SMALL) a[i] = 1.0;
267     ierr = VecRestoreArray(pcis->D,&a);CHKERRQ(ierr);
268   }
269   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
270   ierr = VecScatterBegin(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
271   ierr = VecScatterEnd(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
272   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
273   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
274   ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr);
275   /* now setup */
276   if (pcbddc->use_deluxe_scaling) {
277     if (!pcbddc->deluxe_ctx) {
278       ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr);
279     }
280     ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr);
281     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr);
282     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr);
283   } else {
284     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr);
285     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr);
286   }
287   ierr = PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr);
288 
289   /* test */
290   if (pcbddc->dbg_flag) {
291     Mat         B0_B = NULL;
292     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
293     Vec         vec2_global;
294     PetscViewer viewer = pcbddc->dbg_viewer;
295     PetscReal   error;
296 
297     /* extension -> from local to parallel */
298     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
299     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
300     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
301     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
302     ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr);
303     ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr);
304     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
305     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
306     if (pcbddc->benign_n) {
307       IS is_dummy;
308 
309       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr);
310       ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
311       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
312       ierr = MatCreateVecs(B0_B,NULL,&B0_Bv);CHKERRQ(ierr);
313       ierr = VecDuplicate(B0_Bv,&B0_Bv2);CHKERRQ(ierr);
314       ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv);CHKERRQ(ierr);
315     }
316     ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr);
317     if (pcbddc->benign_saddle_point) {
318       PetscReal errorl = 0.;
319       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
320       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
321       if (pcbddc->benign_n) {
322         ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv2);CHKERRQ(ierr);
323         ierr = VecAXPY(B0_Bv,-1.0,B0_Bv2);CHKERRQ(ierr);
324         ierr = VecNorm(B0_Bv,NORM_INFINITY,&errorl);CHKERRQ(ierr);
325       }
326       ierr = MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
327       ierr = PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);CHKERRQ(ierr);
328     }
329     ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr);
330     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
331     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr);
332     ierr = VecDestroy(&vec2_global);CHKERRQ(ierr);
333 
334     /* restriction -> from parallel to local */
335     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
336     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
337     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
338     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
339     ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr);
340     ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr);
341     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
342     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
343     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
344     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr);
345     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
346     ierr = VecDestroy(&B0_Bv);CHKERRQ(ierr);
347     ierr = VecDestroy(&B0_Bv2);CHKERRQ(ierr);
348   }
349   PetscFunctionReturn(0);
350 }
351 
352 PetscErrorCode PCBDDCScalingDestroy(PC pc)
353 {
354   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;
355   PetscErrorCode ierr;
356 
357   PetscFunctionBegin;
358   if (pcbddc->deluxe_ctx) {
359     ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr);
360   }
361   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
362   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr);
363   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr);
364   PetscFunctionReturn(0);
365 }
366 
367 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
368 {
369   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
370   PCBDDCDeluxeScaling deluxe_ctx;
371   PetscErrorCode      ierr;
372 
373   PetscFunctionBegin;
374   ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr);
375   pcbddc->deluxe_ctx = deluxe_ctx;
376   PetscFunctionReturn(0);
377 }
378 
379 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
380 {
381   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
382   PetscErrorCode      ierr;
383 
384   PetscFunctionBegin;
385   ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr);
386   ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr);
387   PetscFunctionReturn(0);
388 }
389 
390 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
391 {
392   PetscInt       i;
393   PetscErrorCode ierr;
394 
395   PetscFunctionBegin;
396   ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
397   deluxe_ctx->n_simple = 0;
398   for (i=0;i<deluxe_ctx->seq_n;i++) {
399     ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr);
400     ierr = VecDestroy(&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
401     ierr = VecDestroy(&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
402     ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
403     ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
404   }
405   ierr = PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr);
406   ierr = PetscFree(deluxe_ctx->workspace);CHKERRQ(ierr);
407   deluxe_ctx->seq_n = 0;
408   PetscFunctionReturn(0);
409 }
410 
411 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
412 {
413   PC_IS               *pcis=(PC_IS*)pc->data;
414   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
415   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
416   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
417   PetscErrorCode      ierr;
418 
419   PetscFunctionBegin;
420   /* reset data structures if the topology has changed */
421   if (pcbddc->recompute_topography) {
422     ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr);
423   }
424 
425   /* Compute data structures to solve sequential problems */
426   ierr = PCBDDCScalingSetUp_Deluxe_Private(pc);CHKERRQ(ierr);
427 
428   /* diagonal scaling on interface dofs not contained in cc */
429   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
430     PetscInt n_com,n_dir;
431     n_com = 0;
432     if (sub_schurs->is_vertices) {
433       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_com);CHKERRQ(ierr);
434     }
435     n_dir = 0;
436     if (sub_schurs->is_dir) {
437       ierr = ISGetLocalSize(sub_schurs->is_dir,&n_dir);CHKERRQ(ierr);
438     }
439     if (!deluxe_ctx->n_simple) {
440       deluxe_ctx->n_simple = n_dir + n_com;
441       ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
442       if (sub_schurs->is_vertices) {
443         PetscInt       nmap;
444         const PetscInt *idxs;
445 
446         ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
447         ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
448         if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %D != %D",nmap,n_com);
449         ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
450       }
451       if (sub_schurs->is_dir) {
452         PetscInt       nmap;
453         const PetscInt *idxs;
454 
455         ierr = ISGetIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr);
456         ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);CHKERRQ(ierr);
457         if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %D != %D",nmap,n_dir);
458         ierr = ISRestoreIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr);
459       }
460       ierr = PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
461     } else {
462       if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %D is different from the previous one computed %D",n_dir + n_com,deluxe_ctx->n_simple);
463     }
464   } else {
465     deluxe_ctx->n_simple = 0;
466     deluxe_ctx->idx_simple_B = NULL;
467   }
468   PetscFunctionReturn(0);
469 }
470 
471 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
472 {
473   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
474   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
475   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
476   PetscScalar            *matdata,*matdata2;
477   PetscInt               i,max_subset_size,cum,cum2;
478   const PetscInt         *idxs;
479   PetscBool              newsetup = PETSC_FALSE;
480   PetscErrorCode         ierr;
481 
482   PetscFunctionBegin;
483   if (!sub_schurs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Missing PCBDDCSubSchurs");
484   if (!sub_schurs->n_subs) PetscFunctionReturn(0);
485 
486   /* Allocate arrays for subproblems */
487   if (!deluxe_ctx->seq_n) {
488     deluxe_ctx->seq_n = sub_schurs->n_subs;
489     ierr = PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr);
490     newsetup = PETSC_TRUE;
491   } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %D is different from the sub_schurs %D",deluxe_ctx->seq_n,sub_schurs->n_subs);
492 
493   /* the change of basis is just a reference to sub_schurs->change (if any) */
494   deluxe_ctx->change         = sub_schurs->change;
495   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
496 
497   /* Create objects for deluxe */
498   max_subset_size = 0;
499   for (i=0;i<sub_schurs->n_subs;i++) {
500     PetscInt subset_size;
501     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
502     max_subset_size = PetscMax(subset_size,max_subset_size);
503   }
504   if (newsetup) {
505     ierr = PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);CHKERRQ(ierr);
506   }
507   cum = cum2 = 0;
508   ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr);
509   ierr = MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr);
510   ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr);
511   for (i=0;i<deluxe_ctx->seq_n;i++) {
512     PetscInt     subset_size;
513 
514     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
515     if (newsetup) {
516       IS  sub;
517       /* work vectors */
518       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
519       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
520 
521       /* scatters */
522       ierr = ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);CHKERRQ(ierr);
523       ierr = VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr);
524       ierr = ISDestroy(&sub);CHKERRQ(ierr);
525     }
526 
527     /* S_E_j */
528     ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
529     ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
530 
531     /* \sum_k S^k_E_j */
532     ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
533     ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
534     ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);CHKERRQ(ierr);
535     ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);CHKERRQ(ierr);
536     if (sub_schurs->is_hermitian) {
537       ierr = MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);CHKERRQ(ierr);
538     } else {
539       ierr = MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);CHKERRQ(ierr);
540     }
541     if (pcbddc->deluxe_singlemat) {
542       Mat X,Y;
543       if (!sub_schurs->is_hermitian) {
544         ierr = MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);CHKERRQ(ierr);
545       } else {
546         ierr = PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
547         X    = deluxe_ctx->seq_mat[i];
548       }
549       ierr = MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);CHKERRQ(ierr);
550       if (!sub_schurs->is_hermitian) {
551         ierr = PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr);
552       } else {
553         ierr = MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr);
554       }
555 
556       ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
557       ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
558       ierr = MatDestroy(&X);CHKERRQ(ierr);
559       if (deluxe_ctx->change) {
560         Mat C,CY;
561         if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis");
562         ierr = KSPGetOperators(deluxe_ctx->change[i],&C,NULL);CHKERRQ(ierr);
563         ierr = MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);CHKERRQ(ierr);
564         ierr = MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr);
565         ierr = MatDestroy(&CY);CHKERRQ(ierr);
566         ierr = MatProductClear(Y);CHKERRQ(ierr); /* clear internal matproduct structure of Y since CY is destroyed */
567       }
568       ierr = MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);CHKERRQ(ierr);
569       deluxe_ctx->seq_mat[i] = Y;
570     }
571     cum += subset_size;
572     cum2 += subset_size*subset_size;
573   }
574   ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr);
575   ierr = MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr);
576   ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr);
577   if (pcbddc->deluxe_singlemat) {
578     deluxe_ctx->change         = NULL;
579     deluxe_ctx->change_with_qr = PETSC_FALSE;
580   }
581 
582   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
583     for (i=0;i<deluxe_ctx->seq_n;i++) {
584       if (newsetup) {
585         PC pc;
586 
587         ierr = KSPGetPC(deluxe_ctx->change[i],&pc);CHKERRQ(ierr);
588         ierr = PCSetType(pc,PCLU);CHKERRQ(ierr);
589         ierr = KSPSetFromOptions(deluxe_ctx->change[i]);CHKERRQ(ierr);
590       }
591       ierr = KSPSetUp(deluxe_ctx->change[i]);CHKERRQ(ierr);
592     }
593   }
594   PetscFunctionReturn(0);
595 }
596