xref: /petsc/src/ksp/pc/impls/bddc/bddcscalingbasic.c (revision 0aa1f76de5929987880f966cbffd27c853dc8e41)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 #include <../src/mat/impls/dense/seq/dense.h>
5 
6 /* prototypes for deluxe functions */
7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
12 
13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
14 {
15   Mat_SeqDense   *mat = (Mat_SeqDense*)A->data;
16   PetscErrorCode ierr;
17   PetscScalar    *b,*x;
18   PetscInt       n;
19   PetscBLASInt   nrhs,info,m;
20   PetscBool      flg;
21 
22   PetscFunctionBegin;
23   ierr = PetscBLASIntCast(A->rmap->n,&m);CHKERRQ(ierr);
24   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
25   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
26   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
27   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
28 
29   ierr = MatGetSize(B,NULL,&n);CHKERRQ(ierr);
30   ierr = PetscBLASIntCast(n,&nrhs);CHKERRQ(ierr);
31   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
32   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
33 
34   ierr = PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));CHKERRQ(ierr);
35 
36   if (A->factortype == MAT_FACTOR_LU) {
37 #if defined(PETSC_MISSING_LAPACK_GETRS)
38     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
39 #else
40     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
41     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
42 #endif
43   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported");
44 
45   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
46   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
47   ierr = PetscLogFlops(nrhs*(2.0*m*m - m));CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
52 {
53   PC_IS*         pcis = (PC_IS*)pc->data;
54   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   /* Apply partition of unity */
59   ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr);
60   ierr = VecSet(global_vector,0.0);CHKERRQ(ierr);
61   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
62   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
67 {
68   PC_IS*              pcis=(PC_IS*)pc->data;
69   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
70   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
71   PetscErrorCode      ierr;
72 
73   PetscFunctionBegin;
74   ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr);
75   ierr = VecSet(y,0.0);CHKERRQ(ierr);
76   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
77     PetscInt          i;
78     const PetscScalar *array_x,*array_D;
79     PetscScalar       *array;
80     ierr = VecGetArrayRead(x,&array_x);CHKERRQ(ierr);
81     ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
82     ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
83     for (i=0;i<deluxe_ctx->n_simple;i++) {
84       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
85     }
86     ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
87     ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
88     ierr = VecRestoreArrayRead(x,&array_x);CHKERRQ(ierr);
89   }
90   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
91   if (deluxe_ctx->seq_mat) {
92     PetscInt i;
93     for (i=0;i<deluxe_ctx->seq_n;i++) {
94       if (deluxe_ctx->change) {
95         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
96         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
97         if (deluxe_ctx->change_with_qr) {
98           Mat change;
99 
100           ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
101           ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
102         } else {
103           ierr = KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
104         }
105       } else {
106         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
107         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
108       }
109       ierr = MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
110       if (deluxe_ctx->seq_mat_inv_sum[i]) {
111         PetscScalar *x;
112 
113         ierr = VecGetArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr);
114         ierr = VecPlaceArray(deluxe_ctx->seq_work1[i],x);CHKERRQ(ierr);
115         ierr = VecRestoreArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr);
116         ierr = MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
117         ierr = VecResetArray(deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
118       }
119       if (deluxe_ctx->change) {
120         Mat change;
121 
122         ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
123         ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
124         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
125         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
126       } else {
127         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
128         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
129       }
130     }
131   }
132   /* put local boundary part in global vector */
133   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
134   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
139 {
140   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
141   PetscErrorCode ierr;
142 
143   PetscFunctionBegin;
144   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
145   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2);
146   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3);
147   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
148   ierr = PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
153 {
154   PetscErrorCode ierr;
155   PC_IS          *pcis = (PC_IS*)pc->data;
156 
157   PetscFunctionBegin;
158   ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
159   ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
160   /* Apply partition of unity */
161   ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
166 {
167   PC_IS*              pcis=(PC_IS*)pc->data;
168   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
169   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
170   PetscErrorCode      ierr;
171 
172   PetscFunctionBegin;
173   /* get local boundary part of global vector */
174   ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
175   ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
176   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
177     PetscInt          i;
178     PetscScalar       *array_y;
179     const PetscScalar *array_D;
180     ierr = VecGetArray(y,&array_y);CHKERRQ(ierr);
181     ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
182     for (i=0;i<deluxe_ctx->n_simple;i++) {
183       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
184     }
185     ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
186     ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr);
187   }
188   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
189   if (deluxe_ctx->seq_mat) {
190     PetscInt i;
191     for (i=0;i<deluxe_ctx->seq_n;i++) {
192       if (deluxe_ctx->change) {
193         Mat change;
194 
195         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
196         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
197         ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
198         ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
199       } else {
200         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
201         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
202       }
203       if (deluxe_ctx->seq_mat_inv_sum[i]) {
204         PetscScalar *x;
205 
206         ierr = VecGetArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr);
207         ierr = VecPlaceArray(deluxe_ctx->seq_work2[i],x);CHKERRQ(ierr);
208         ierr = VecRestoreArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr);
209         ierr = MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
210         ierr = VecResetArray(deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
211       }
212       ierr = MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
213       if (deluxe_ctx->change) {
214         if (deluxe_ctx->change_with_qr) {
215           Mat change;
216 
217           ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
218           ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
219         } else {
220           ierr = KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
221         }
222         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
223         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
224       } else {
225         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
226         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
227       }
228     }
229   }
230   PetscFunctionReturn(0);
231 }
232 
233 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
234 {
235   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
236   PetscErrorCode ierr;
237 
238   PetscFunctionBegin;
239   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
240   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2);
241   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3);
242   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
243   ierr = PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 PetscErrorCode PCBDDCScalingSetUp(PC pc)
248 {
249   PC_IS*         pcis=(PC_IS*)pc->data;
250   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;
251   PetscErrorCode ierr;
252 
253   PetscFunctionBegin;
254   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
255   /* create work vector for the operator */
256   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
257   ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr);
258   /* always rebuild pcis->D */
259   if (pcis->use_stiffness_scaling) {
260     PetscScalar *a;
261     PetscInt    i,n;
262 
263     ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr);
264     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
265     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
266     ierr = VecGetLocalSize(pcis->D,&n);CHKERRQ(ierr);
267     ierr = VecGetArray(pcis->D,&a);CHKERRQ(ierr);
268     for (i=0;i<n;i++) if (PetscAbsScalar(a[i])<PETSC_SMALL) a[i] = 1.0;
269     ierr = VecRestoreArray(pcis->D,&a);CHKERRQ(ierr);
270   }
271   ierr = VecCopy(pcis->D,pcis->vec1_B);CHKERRQ(ierr);
272   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
273   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
274   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
275   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
276   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
277   ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr);
278   /* now setup */
279   if (pcbddc->use_deluxe_scaling) {
280     if (!pcbddc->deluxe_ctx) {
281       ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr);
282     }
283     ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr);
284     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr);
285     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr);
286   } else {
287     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr);
288     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr);
289   }
290 
291   /* test */
292   if (pcbddc->dbg_flag) {
293     Mat         B0_B = NULL;
294     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
295     Vec         vec2_global;
296     PetscViewer viewer = pcbddc->dbg_viewer;
297     PetscReal   error;
298 
299     /* extension -> from local to parallel */
300     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
301     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
302     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
303     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
304     ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr);
305     ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr);
306     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
307     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
308     if (pcbddc->benign_n) {
309       IS is_dummy;
310 
311       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr);
312       ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
313       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
314       ierr = MatCreateVecs(B0_B,NULL,&B0_Bv);CHKERRQ(ierr);
315       ierr = VecDuplicate(B0_Bv,&B0_Bv2);CHKERRQ(ierr);
316       ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv);CHKERRQ(ierr);
317     }
318     ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr);
319     if (pcbddc->benign_saddle_point) {
320       PetscReal errorl = 0.;
321       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
322       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
323       if (pcbddc->benign_n) {
324         ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv2);CHKERRQ(ierr);
325         ierr = VecAXPY(B0_Bv,-1.0,B0_Bv2);CHKERRQ(ierr);
326         ierr = VecNorm(B0_Bv,NORM_INFINITY,&errorl);CHKERRQ(ierr);
327       }
328       ierr = MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
329       ierr = PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);CHKERRQ(ierr);
330     }
331     ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr);
332     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
333     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr);
334     ierr = VecDestroy(&vec2_global);CHKERRQ(ierr);
335 
336     /* restriction -> from parallel to local */
337     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
338     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
339     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
340     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
341     ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr);
342     ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr);
343     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
344     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
345     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
346     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr);
347     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
348     ierr = VecDestroy(&B0_Bv);CHKERRQ(ierr);
349     ierr = VecDestroy(&B0_Bv2);CHKERRQ(ierr);
350   }
351   PetscFunctionReturn(0);
352 }
353 
354 PetscErrorCode PCBDDCScalingDestroy(PC pc)
355 {
356   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   if (pcbddc->deluxe_ctx) {
361     ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr);
362   }
363   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
364   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr);
365   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr);
366   PetscFunctionReturn(0);
367 }
368 
369 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
370 {
371   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
372   PCBDDCDeluxeScaling deluxe_ctx;
373   PetscErrorCode      ierr;
374 
375   PetscFunctionBegin;
376   ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr);
377   pcbddc->deluxe_ctx = deluxe_ctx;
378   PetscFunctionReturn(0);
379 }
380 
381 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
382 {
383   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
384   PetscErrorCode      ierr;
385 
386   PetscFunctionBegin;
387   ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr);
388   ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr);
389   PetscFunctionReturn(0);
390 }
391 
392 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
393 {
394   PetscInt       i;
395   PetscErrorCode ierr;
396 
397   PetscFunctionBegin;
398   ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
399   deluxe_ctx->n_simple = 0;
400   for (i=0;i<deluxe_ctx->seq_n;i++) {
401     ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr);
402     ierr = VecDestroy(&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
403     ierr = VecDestroy(&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
404     ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
405     ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
406   }
407   ierr = PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr);
408   ierr = PetscFree(deluxe_ctx->workspace);CHKERRQ(ierr);
409   deluxe_ctx->seq_n = 0;
410   PetscFunctionReturn(0);
411 }
412 
413 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
414 {
415   PC_IS               *pcis=(PC_IS*)pc->data;
416   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
417   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
418   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
419   PetscErrorCode      ierr;
420 
421   PetscFunctionBegin;
422   /* reset data structures if the topology has changed */
423   if (pcbddc->recompute_topography) {
424     ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr);
425   }
426 
427   /* Compute data structures to solve sequential problems */
428   ierr = PCBDDCScalingSetUp_Deluxe_Private(pc);CHKERRQ(ierr);
429 
430   /* diagonal scaling on interface dofs not contained in cc */
431   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
432     PetscInt n_com,n_dir;
433     n_com = 0;
434     if (sub_schurs->is_vertices) {
435       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_com);CHKERRQ(ierr);
436     }
437     n_dir = 0;
438     if (sub_schurs->is_dir) {
439       ierr = ISGetLocalSize(sub_schurs->is_dir,&n_dir);CHKERRQ(ierr);
440     }
441     if (!deluxe_ctx->n_simple) {
442       deluxe_ctx->n_simple = n_dir + n_com;
443       ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
444       if (sub_schurs->is_vertices) {
445         PetscInt       nmap;
446         const PetscInt *idxs;
447 
448         ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
449         ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
450         if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %d != %d",nmap,n_com);
451         ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
452       }
453       if (sub_schurs->is_dir) {
454         PetscInt       nmap;
455         const PetscInt *idxs;
456 
457         ierr = ISGetIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr);
458         ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);CHKERRQ(ierr);
459         if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %d != %d",nmap,n_dir);
460         ierr = ISRestoreIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr);
461       }
462       ierr = PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
463     } else {
464       if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %d is different from the previous one computed %d",n_dir + n_com,deluxe_ctx->n_simple);
465     }
466   } else {
467     deluxe_ctx->n_simple = 0;
468     deluxe_ctx->idx_simple_B = 0;
469   }
470   PetscFunctionReturn(0);
471 }
472 
473 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
474 {
475   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
476   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
477   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
478   PetscScalar            *matdata,*matdata2;
479   PetscInt               i,max_subset_size,cum,cum2;
480   const PetscInt         *idxs;
481   PetscBool              newsetup = PETSC_FALSE;
482   PetscErrorCode         ierr;
483 
484   PetscFunctionBegin;
485   if (!sub_schurs->n_subs) {
486     PetscFunctionReturn(0);
487   }
488 
489   /* Allocate arrays for subproblems */
490   if (!deluxe_ctx->seq_n) {
491     deluxe_ctx->seq_n = sub_schurs->n_subs;
492     ierr = PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr);
493     newsetup = PETSC_TRUE;
494   } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) {
495     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %d is different from the sub_schurs %d",deluxe_ctx->seq_n,sub_schurs->n_subs);
496   }
497   /* the change of basis is just a reference to sub_schurs->change (if any) */
498   deluxe_ctx->change         = sub_schurs->change;
499   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
500 
501   /* Create objects for deluxe */
502   max_subset_size = 0;
503   for (i=0;i<sub_schurs->n_subs;i++) {
504     PetscInt subset_size;
505     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
506     max_subset_size = PetscMax(subset_size,max_subset_size);
507   }
508   if (newsetup) {
509     ierr = PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);CHKERRQ(ierr);
510   }
511   cum = cum2 = 0;
512   ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr);
513   ierr = MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr);
514   ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr);
515   for (i=0;i<deluxe_ctx->seq_n;i++) {
516     PetscInt     subset_size;
517 
518     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
519     if (newsetup) {
520       IS  sub;
521       /* work vectors */
522       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
523       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
524 
525       /* scatters */
526       ierr = ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);CHKERRQ(ierr);
527       ierr = VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr);
528       ierr = ISDestroy(&sub);CHKERRQ(ierr);
529     }
530 
531     /* S_E_j */
532     ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
533     ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
534 
535     /* \sum_k S^k_E_j */
536     ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
537     ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
538     ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);CHKERRQ(ierr);
539     ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);CHKERRQ(ierr);
540     if (sub_schurs->is_hermitian) {
541       ierr = MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);CHKERRQ(ierr);
542     } else {
543       ierr = MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);CHKERRQ(ierr);
544     }
545     if (pcbddc->deluxe_singlemat) {
546       Mat X,Y;
547       if (!sub_schurs->is_hermitian) {
548         ierr = MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);CHKERRQ(ierr);
549       } else {
550         ierr = PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
551         X    = deluxe_ctx->seq_mat[i];
552       }
553       ierr = MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);CHKERRQ(ierr);
554       if (!sub_schurs->is_hermitian) {
555         ierr = PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr);
556       } else {
557         ierr = MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr);
558       }
559 
560       ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
561       ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
562       ierr = MatDestroy(&X);CHKERRQ(ierr);
563       if (deluxe_ctx->change) {
564         Mat C,CY;
565 
566         if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis");
567         ierr = KSPGetOperators(deluxe_ctx->change[i],&C,NULL);CHKERRQ(ierr);
568         ierr = MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);CHKERRQ(ierr);
569         ierr = MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr);
570         ierr = MatDestroy(&CY);CHKERRQ(ierr);
571       }
572       ierr = MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);CHKERRQ(ierr);
573       deluxe_ctx->seq_mat[i] = Y;
574     }
575     cum += subset_size;
576     cum2 += subset_size*subset_size;
577   }
578   ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr);
579   ierr = MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr);
580   ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr);
581   if (pcbddc->deluxe_singlemat) {
582     deluxe_ctx->change         = NULL;
583     deluxe_ctx->change_with_qr = PETSC_FALSE;
584   }
585 
586   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
587     for (i=0;i<deluxe_ctx->seq_n;i++) {
588       if (newsetup) {
589         PC pc;
590 
591         ierr = KSPGetPC(deluxe_ctx->change[i],&pc);CHKERRQ(ierr);
592         ierr = PCSetType(pc,PCLU);CHKERRQ(ierr);
593         ierr = KSPSetFromOptions(deluxe_ctx->change[i]);CHKERRQ(ierr);
594       }
595       ierr = KSPSetUp(deluxe_ctx->change[i]);CHKERRQ(ierr);
596     }
597   }
598   PetscFunctionReturn(0);
599 }
600