xref: /petsc/src/ksp/pc/impls/bddc/bddcscalingbasic.c (revision 7ae38d14048e535a346ba522ffbdb8e04ba6fa16)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 #include <../src/mat/impls/dense/seq/dense.h>
5 
6 /* prototypes for deluxe functions */
7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "PCBDDCMatTransposeMatSolve_SeqDense"
15 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
16 {
17   Mat_SeqDense   *mat = (Mat_SeqDense*)A->data;
18   PetscErrorCode ierr;
19   PetscScalar    *b,*x;
20   PetscInt       n;
21   PetscBLASInt   nrhs,info,m;
22   PetscBool      flg;
23 
24   PetscFunctionBegin;
25   ierr = PetscBLASIntCast(A->rmap->n,&m);CHKERRQ(ierr);
26   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
27   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
28   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
29   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
30 
31   ierr = MatGetSize(B,NULL,&n);CHKERRQ(ierr);
32   ierr = PetscBLASIntCast(n,&nrhs);CHKERRQ(ierr);
33   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
34   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
35 
36   ierr = PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));CHKERRQ(ierr);
37 
38   if (A->factortype == MAT_FACTOR_LU) {
39 #if defined(PETSC_MISSING_LAPACK_GETRS)
40     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
41 #else
42     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
43     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
44 #endif
45   } else if (A->factortype == MAT_FACTOR_CHOLESKY) {
46 #if defined(PETSC_MISSING_LAPACK_POTRS)
47     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRS - Lapack routine is unavailable.");
48 #else
49     PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("L",&m,&nrhs,mat->v,&mat->lda,x,&m,&info));
50     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"POTRS Bad solve");
51 #endif
52   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be factored to solve");
53 
54   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
55   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
56   ierr = PetscLogFlops(nrhs*(2.0*m*m - m));CHKERRQ(ierr);
57   PetscFunctionReturn(0);
58 }
59 
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "PCBDDCScalingExtension_Basic"
63 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
64 {
65   PC_IS*         pcis = (PC_IS*)pc->data;
66   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
67   PetscErrorCode ierr;
68 
69   PetscFunctionBegin;
70   /* Apply partition of unity */
71   ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr);
72   ierr = VecSet(global_vector,0.0);CHKERRQ(ierr);
73   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
74   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 #undef __FUNCT__
79 #define __FUNCT__ "PCBDDCScalingExtension_Deluxe"
80 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
81 {
82   PC_IS*              pcis=(PC_IS*)pc->data;
83   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
84   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
85   PetscErrorCode      ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr);
89   ierr = VecSet(y,0.0);CHKERRQ(ierr);
90   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
91     PetscInt          i;
92     const PetscScalar *array_x,*array_D;
93     PetscScalar       *array;
94     ierr = VecGetArrayRead(x,&array_x);CHKERRQ(ierr);
95     ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
96     ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
97     for (i=0;i<deluxe_ctx->n_simple;i++) {
98       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
99     }
100     ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
101     ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
102     ierr = VecRestoreArrayRead(x,&array_x);CHKERRQ(ierr);
103   }
104   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
105   if (deluxe_ctx->seq_mat) {
106     PetscInt i;
107     for (i=0;i<deluxe_ctx->seq_n;i++) {
108       if (deluxe_ctx->change) {
109         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
110         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
111         if (deluxe_ctx->change_with_qr) {
112           Mat change;
113 
114           ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
115           ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
116         } else {
117           ierr = KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
118         }
119       } else {
120         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
121         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
122       }
123       ierr = MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
124       if (deluxe_ctx->seq_mat_inv_sum[i]) {
125         PetscScalar *x;
126 
127         ierr = VecGetArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr);
128         ierr = VecPlaceArray(deluxe_ctx->seq_work1[i],x);CHKERRQ(ierr);
129         ierr = VecRestoreArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr);
130         ierr = MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
131         ierr = VecResetArray(deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
132       }
133       if (deluxe_ctx->change) {
134         Mat change;
135 
136         ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
137         ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
138         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
139         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
140       } else {
141         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
142         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
143       }
144     }
145   }
146   /* put local boundary part in global vector */
147   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
148   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 #undef __FUNCT__
153 #define __FUNCT__ "PCBDDCScalingExtension"
154 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
155 {
156   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
157   PetscErrorCode ierr;
158 
159   PetscFunctionBegin;
160   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
161   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2);
162   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3);
163   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
164   ierr = PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr);
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNCT__
169 #define __FUNCT__ "PCBDDCScalingRestriction_Basic"
170 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
171 {
172   PetscErrorCode ierr;
173   PC_IS          *pcis = (PC_IS*)pc->data;
174 
175   PetscFunctionBegin;
176   ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
177   ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
178   /* Apply partition of unity */
179   ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr);
180   PetscFunctionReturn(0);
181 }
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "PCBDDCScalingRestriction_Deluxe"
185 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
186 {
187   PC_IS*              pcis=(PC_IS*)pc->data;
188   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
189   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
190   PetscErrorCode      ierr;
191 
192   PetscFunctionBegin;
193   /* get local boundary part of global vector */
194   ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
195   ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
196   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
197     PetscInt          i;
198     PetscScalar       *array_y;
199     const PetscScalar *array_D;
200     ierr = VecGetArray(y,&array_y);CHKERRQ(ierr);
201     ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
202     for (i=0;i<deluxe_ctx->n_simple;i++) {
203       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
204     }
205     ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
206     ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr);
207   }
208   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
209   if (deluxe_ctx->seq_mat) {
210     PetscInt i;
211     for (i=0;i<deluxe_ctx->seq_n;i++) {
212       if (deluxe_ctx->change) {
213         Mat change;
214 
215         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
216         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
217         ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
218         ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
219       } else {
220         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
221         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
222       }
223       if (deluxe_ctx->seq_mat_inv_sum[i]) {
224         PetscScalar *x;
225 
226         ierr = VecGetArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr);
227         ierr = VecPlaceArray(deluxe_ctx->seq_work2[i],x);CHKERRQ(ierr);
228         ierr = VecRestoreArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr);
229         ierr = MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
230         ierr = VecResetArray(deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
231       }
232       ierr = MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
233       if (deluxe_ctx->change) {
234         if (deluxe_ctx->change_with_qr) {
235           Mat change;
236 
237           ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
238           ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
239         } else {
240           ierr = KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
241         }
242         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
243         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
244       } else {
245         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
246         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
247       }
248     }
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "PCBDDCScalingRestriction"
255 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
256 {
257   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
258   PetscErrorCode ierr;
259 
260   PetscFunctionBegin;
261   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
262   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2);
263   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3);
264   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
265   ierr = PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 #undef __FUNCT__
270 #define __FUNCT__ "PCBDDCScalingSetUp"
271 PetscErrorCode PCBDDCScalingSetUp(PC pc)
272 {
273   PC_IS*         pcis=(PC_IS*)pc->data;
274   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;
275   PetscErrorCode ierr;
276 
277   PetscFunctionBegin;
278   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
279   /* create work vector for the operator */
280   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
281   ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr);
282   /* always rebuild pcis->D */
283   if (pcis->use_stiffness_scaling) {
284     ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr);
285     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
286     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
287   }
288   ierr = VecCopy(pcis->D,pcis->vec1_B);CHKERRQ(ierr);
289   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
290   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
291   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
292   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
293   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
294   ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr);
295   /* now setup */
296   if (pcbddc->use_deluxe_scaling) {
297     if (!pcbddc->deluxe_ctx) {
298       ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr);
299     }
300     ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr);
301     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr);
302     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr);
303   } else {
304     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr);
305     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr);
306   }
307 
308   /* test */
309   if (pcbddc->dbg_flag) {
310     Mat         B0_B = NULL;
311     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
312     Vec         vec2_global;
313     PetscViewer viewer = pcbddc->dbg_viewer;
314     PetscReal   error;
315 
316     /* extension -> from local to parallel */
317     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
318     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
319     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
320     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
321     ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr);
322     ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr);
323     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
324     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
325     if (pcbddc->benign_n) {
326       IS is_dummy;
327 
328       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr);
329       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
330       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
331       ierr = MatCreateVecs(B0_B,NULL,&B0_Bv);CHKERRQ(ierr);
332       ierr = VecDuplicate(B0_Bv,&B0_Bv2);CHKERRQ(ierr);
333       ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv);CHKERRQ(ierr);
334     }
335     ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr);
336     if (pcbddc->benign_saddle_point) {
337       PetscReal errorl = 0.;
338       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
339       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
340       if (pcbddc->benign_n) {
341         ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv2);CHKERRQ(ierr);
342         ierr = VecAXPY(B0_Bv,-1.0,B0_Bv2);CHKERRQ(ierr);
343         ierr = VecNorm(B0_Bv,NORM_INFINITY,&errorl);CHKERRQ(ierr);
344       }
345       ierr = MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
346       ierr = PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);CHKERRQ(ierr);
347     }
348     ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr);
349     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
350     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr);
351     ierr = VecDestroy(&vec2_global);CHKERRQ(ierr);
352 
353     /* restriction -> from parallel to local */
354     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
355     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
356     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
357     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
358     ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr);
359     ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr);
360     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
361     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
362     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
363     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr);
364     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
365     ierr = VecDestroy(&B0_Bv);CHKERRQ(ierr);
366     ierr = VecDestroy(&B0_Bv2);CHKERRQ(ierr);
367   }
368   PetscFunctionReturn(0);
369 }
370 
371 #undef __FUNCT__
372 #define __FUNCT__ "PCBDDCScalingDestroy"
373 PetscErrorCode PCBDDCScalingDestroy(PC pc)
374 {
375   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;
376   PetscErrorCode ierr;
377 
378   PetscFunctionBegin;
379   if (pcbddc->deluxe_ctx) {
380     ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr);
381   }
382   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
383   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr);
384   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "PCBDDCScalingCreate_Deluxe"
390 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
391 {
392   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
393   PCBDDCDeluxeScaling deluxe_ctx;
394   PetscErrorCode      ierr;
395 
396   PetscFunctionBegin;
397   ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr);
398   pcbddc->deluxe_ctx = deluxe_ctx;
399   PetscFunctionReturn(0);
400 }
401 
402 #undef __FUNCT__
403 #define __FUNCT__ "PCBDDCScalingDestroy_Deluxe"
404 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
405 {
406   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
407   PetscErrorCode      ierr;
408 
409   PetscFunctionBegin;
410   ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr);
411   ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr);
412   PetscFunctionReturn(0);
413 }
414 
415 #undef __FUNCT__
416 #define __FUNCT__ "PCBDDCScalingReset_Deluxe_Solvers"
417 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
418 {
419   PetscInt       i;
420   PetscErrorCode ierr;
421 
422   PetscFunctionBegin;
423   ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
424   deluxe_ctx->n_simple = 0;
425   for (i=0;i<deluxe_ctx->seq_n;i++) {
426     ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr);
427     ierr = VecDestroy(&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
428     ierr = VecDestroy(&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
429     ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
430     ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
431   }
432   ierr = PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr);
433   ierr = PetscFree(deluxe_ctx->workspace);CHKERRQ(ierr);
434   deluxe_ctx->seq_n = 0;
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNCT__
439 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe"
440 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
441 {
442   PC_IS               *pcis=(PC_IS*)pc->data;
443   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
444   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
445   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
446   PetscErrorCode      ierr;
447 
448   PetscFunctionBegin;
449   /* reset data structures if the topology has changed */
450   if (pcbddc->recompute_topography) {
451     ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr);
452   }
453 
454   /* Compute data structures to solve sequential problems */
455   ierr = PCBDDCScalingSetUp_Deluxe_Private(pc);CHKERRQ(ierr);
456 
457   /* diagonal scaling on interface dofs not contained in cc */
458   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
459     PetscInt n_com,n_dir;
460     n_com = 0;
461     if (sub_schurs->is_vertices) {
462       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_com);CHKERRQ(ierr);
463     }
464     n_dir = 0;
465     if (sub_schurs->is_dir) {
466       ierr = ISGetLocalSize(sub_schurs->is_dir,&n_dir);CHKERRQ(ierr);
467     }
468     if (!deluxe_ctx->n_simple) {
469       deluxe_ctx->n_simple = n_dir + n_com;
470       ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
471       if (sub_schurs->is_vertices) {
472         PetscInt       nmap;
473         const PetscInt *idxs;
474 
475         ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
476         ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
477         if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %d != %d",nmap,n_com);
478         ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
479       }
480       if (sub_schurs->is_dir) {
481         PetscInt       nmap;
482         const PetscInt *idxs;
483 
484         ierr = ISGetIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr);
485         ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);CHKERRQ(ierr);
486         if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %d != %d",nmap,n_dir);
487         ierr = ISRestoreIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr);
488       }
489       ierr = PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
490     } else {
491       if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %d is different from the previous one computed %d",n_dir + n_com,deluxe_ctx->n_simple);
492     }
493   } else {
494     deluxe_ctx->n_simple = 0;
495     deluxe_ctx->idx_simple_B = 0;
496   }
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Private"
502 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
503 {
504   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
505   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
506   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
507   PetscScalar            *matdata,*matdata2;
508   PetscInt               i,max_subset_size,cum,cum2;
509   const PetscInt         *idxs;
510   PetscBool              newsetup = PETSC_FALSE;
511   PetscErrorCode         ierr;
512 
513   PetscFunctionBegin;
514   if (!sub_schurs->n_subs) {
515     PetscFunctionReturn(0);
516   }
517 
518   /* Allocate arrays for subproblems */
519   if (!deluxe_ctx->seq_n) {
520     deluxe_ctx->seq_n = sub_schurs->n_subs;
521     ierr = PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr);
522     newsetup = PETSC_TRUE;
523   } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) {
524     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %d is different from the sub_schurs %d",deluxe_ctx->seq_n,sub_schurs->n_subs);
525   }
526   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
527 
528   /* Create objects for deluxe */
529   max_subset_size = 0;
530   for (i=0;i<sub_schurs->n_subs;i++) {
531     PetscInt subset_size;
532     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
533     max_subset_size = PetscMax(subset_size,max_subset_size);
534   }
535   if (newsetup) {
536     ierr = PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);CHKERRQ(ierr);
537   }
538   cum = cum2 = 0;
539   ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr);
540   ierr = MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr);
541   ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr);
542   for (i=0;i<deluxe_ctx->seq_n;i++) {
543     PetscInt     subset_size;
544 
545     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
546     if (newsetup) {
547       IS  sub;
548       /* work vectors */
549       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
550       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
551 
552       /* scatters */
553       ierr = ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);CHKERRQ(ierr);
554       ierr = VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr);
555       ierr = ISDestroy(&sub);CHKERRQ(ierr);
556     }
557 
558     /* S_E_j */
559     ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
560     ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
561 
562     /* \sum_k S^k_E_j */
563     ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
564     ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
565 
566     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
567       ierr = MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);CHKERRQ(ierr);
568     } else {
569       ierr = MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);CHKERRQ(ierr);
570     }
571     if (pcbddc->deluxe_singlemat) {
572       Mat X,Y;
573 
574       if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
575         ierr = MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);CHKERRQ(ierr);
576       } else {
577         ierr = PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
578         X    = deluxe_ctx->seq_mat[i];
579       }
580       ierr = MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);CHKERRQ(ierr);
581       if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
582         ierr = PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr);
583       } else {
584         ierr = MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr);
585       }
586 
587       ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
588       ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
589       ierr = MatDestroy(&X);CHKERRQ(ierr);
590       ierr = MatTranspose(Y,MAT_REUSE_MATRIX,&Y);CHKERRQ(ierr);
591       deluxe_ctx->seq_mat[i] = Y;
592     }
593     cum += subset_size;
594     cum2 += subset_size*subset_size;
595   }
596   ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr);
597   ierr = MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr);
598   ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr);
599 
600   /* the change of basis is just a reference to sub_schurs->change (if any) */
601   deluxe_ctx->change = sub_schurs->change;
602   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
603     for (i=0;i<deluxe_ctx->seq_n;i++) {
604       if (newsetup) {
605         PC pc;
606 
607         ierr = KSPGetPC(deluxe_ctx->change[i],&pc);CHKERRQ(ierr);
608         ierr = PCSetType(pc,PCLU);CHKERRQ(ierr);
609         ierr = KSPSetFromOptions(deluxe_ctx->change[i]);CHKERRQ(ierr);
610       }
611       ierr = KSPSetUp(deluxe_ctx->change[i]);CHKERRQ(ierr);
612     }
613   }
614   PetscFunctionReturn(0);
615 }
616