xref: /petsc/src/ksp/pc/impls/bddc/bddcscalingbasic.c (revision 9f4d3c52fa2fe0bb72fec4f4e85d8e495867af35)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 #include <../src/mat/impls/dense/seq/dense.h>
5 
6 /* prototypes for deluxe functions */
7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
12 
13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
14 {
15   Mat_SeqDense   *mat = (Mat_SeqDense*)A->data;
16   PetscErrorCode ierr;
17   PetscScalar    *b,*x;
18   PetscInt       n;
19   PetscBLASInt   nrhs,info,m;
20   PetscBool      flg;
21 
22   PetscFunctionBegin;
23   ierr = PetscBLASIntCast(A->rmap->n,&m);CHKERRQ(ierr);
24   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
25   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
26   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
27   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
28 
29   ierr = MatGetSize(B,NULL,&n);CHKERRQ(ierr);
30   ierr = PetscBLASIntCast(n,&nrhs);CHKERRQ(ierr);
31   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
32   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
33 
34   ierr = PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));CHKERRQ(ierr);
35 
36   if (A->factortype == MAT_FACTOR_LU) {
37 #if defined(PETSC_MISSING_LAPACK_GETRS)
38     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
39 #else
40     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
41     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
42 #endif
43   } else if (A->factortype == MAT_FACTOR_CHOLESKY) {
44 #if defined(PETSC_MISSING_LAPACK_POTRS)
45     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRS - Lapack routine is unavailable.");
46 #else
47     PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("L",&m,&nrhs,mat->v,&mat->lda,x,&m,&info));
48     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"POTRS Bad solve");
49 #endif
50   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be factored to solve");
51 
52   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
53   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
54   ierr = PetscLogFlops(nrhs*(2.0*m*m - m));CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 
59 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
60 {
61   PC_IS*         pcis = (PC_IS*)pc->data;
62   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
63   PetscErrorCode ierr;
64 
65   PetscFunctionBegin;
66   /* Apply partition of unity */
67   ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr);
68   ierr = VecSet(global_vector,0.0);CHKERRQ(ierr);
69   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
70   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
75 {
76   PC_IS*              pcis=(PC_IS*)pc->data;
77   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
78   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
79   PetscErrorCode      ierr;
80 
81   PetscFunctionBegin;
82   ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr);
83   ierr = VecSet(y,0.0);CHKERRQ(ierr);
84   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
85     PetscInt          i;
86     const PetscScalar *array_x,*array_D;
87     PetscScalar       *array;
88     ierr = VecGetArrayRead(x,&array_x);CHKERRQ(ierr);
89     ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
90     ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
91     for (i=0;i<deluxe_ctx->n_simple;i++) {
92       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
93     }
94     ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
95     ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
96     ierr = VecRestoreArrayRead(x,&array_x);CHKERRQ(ierr);
97   }
98   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
99   if (deluxe_ctx->seq_mat) {
100     PetscInt i;
101     for (i=0;i<deluxe_ctx->seq_n;i++) {
102       if (deluxe_ctx->change) {
103         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
104         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
105         if (deluxe_ctx->change_with_qr) {
106           Mat change;
107 
108           ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
109           ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
110         } else {
111           ierr = KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
112         }
113       } else {
114         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
115         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
116       }
117       ierr = MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
118       if (deluxe_ctx->seq_mat_inv_sum[i]) {
119         PetscScalar *x;
120 
121         ierr = VecGetArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr);
122         ierr = VecPlaceArray(deluxe_ctx->seq_work1[i],x);CHKERRQ(ierr);
123         ierr = VecRestoreArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr);
124         ierr = MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
125         ierr = VecResetArray(deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
126       }
127       if (deluxe_ctx->change) {
128         Mat change;
129 
130         ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
131         ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
132         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
133         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
134       } else {
135         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
136         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
137       }
138     }
139   }
140   /* put local boundary part in global vector */
141   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
142   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
147 {
148   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
149   PetscErrorCode ierr;
150 
151   PetscFunctionBegin;
152   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
153   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2);
154   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3);
155   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
156   ierr = PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr);
157   PetscFunctionReturn(0);
158 }
159 
160 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
161 {
162   PetscErrorCode ierr;
163   PC_IS          *pcis = (PC_IS*)pc->data;
164 
165   PetscFunctionBegin;
166   ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
167   ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
168   /* Apply partition of unity */
169   ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr);
170   PetscFunctionReturn(0);
171 }
172 
173 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
174 {
175   PC_IS*              pcis=(PC_IS*)pc->data;
176   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
177   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
178   PetscErrorCode      ierr;
179 
180   PetscFunctionBegin;
181   /* get local boundary part of global vector */
182   ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
183   ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
184   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
185     PetscInt          i;
186     PetscScalar       *array_y;
187     const PetscScalar *array_D;
188     ierr = VecGetArray(y,&array_y);CHKERRQ(ierr);
189     ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
190     for (i=0;i<deluxe_ctx->n_simple;i++) {
191       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
192     }
193     ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
194     ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr);
195   }
196   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
197   if (deluxe_ctx->seq_mat) {
198     PetscInt i;
199     for (i=0;i<deluxe_ctx->seq_n;i++) {
200       if (deluxe_ctx->change) {
201         Mat change;
202 
203         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
204         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
205         ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
206         ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
207       } else {
208         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
209         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
210       }
211       if (deluxe_ctx->seq_mat_inv_sum[i]) {
212         PetscScalar *x;
213 
214         ierr = VecGetArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr);
215         ierr = VecPlaceArray(deluxe_ctx->seq_work2[i],x);CHKERRQ(ierr);
216         ierr = VecRestoreArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr);
217         ierr = MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
218         ierr = VecResetArray(deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
219       }
220       ierr = MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
221       if (deluxe_ctx->change) {
222         if (deluxe_ctx->change_with_qr) {
223           Mat change;
224 
225           ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr);
226           ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
227         } else {
228           ierr = KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
229         }
230         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
231         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
232       } else {
233         ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
234         ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
235       }
236     }
237   }
238   PetscFunctionReturn(0);
239 }
240 
241 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
242 {
243   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
244   PetscErrorCode ierr;
245 
246   PetscFunctionBegin;
247   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
248   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2);
249   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3);
250   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
251   ierr = PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 PetscErrorCode PCBDDCScalingSetUp(PC pc)
256 {
257   PC_IS*         pcis=(PC_IS*)pc->data;
258   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;
259   PetscErrorCode ierr;
260 
261   PetscFunctionBegin;
262   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
263   /* create work vector for the operator */
264   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
265   ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr);
266   /* always rebuild pcis->D */
267   if (pcis->use_stiffness_scaling) {
268     ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr);
269     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
270     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
271   }
272   ierr = VecCopy(pcis->D,pcis->vec1_B);CHKERRQ(ierr);
273   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
274   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
275   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
276   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
277   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
278   ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr);
279   /* now setup */
280   if (pcbddc->use_deluxe_scaling) {
281     if (!pcbddc->deluxe_ctx) {
282       ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr);
283     }
284     ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr);
285     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr);
286     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr);
287   } else {
288     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr);
289     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr);
290   }
291 
292   /* test */
293   if (pcbddc->dbg_flag) {
294     Mat         B0_B = NULL;
295     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
296     Vec         vec2_global;
297     PetscViewer viewer = pcbddc->dbg_viewer;
298     PetscReal   error;
299 
300     /* extension -> from local to parallel */
301     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
302     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
303     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
304     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
305     ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr);
306     ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr);
307     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
308     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
309     if (pcbddc->benign_n) {
310       IS is_dummy;
311 
312       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr);
313       ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
314       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
315       ierr = MatCreateVecs(B0_B,NULL,&B0_Bv);CHKERRQ(ierr);
316       ierr = VecDuplicate(B0_Bv,&B0_Bv2);CHKERRQ(ierr);
317       ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv);CHKERRQ(ierr);
318     }
319     ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr);
320     if (pcbddc->benign_saddle_point) {
321       PetscReal errorl = 0.;
322       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
323       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
324       if (pcbddc->benign_n) {
325         ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv2);CHKERRQ(ierr);
326         ierr = VecAXPY(B0_Bv,-1.0,B0_Bv2);CHKERRQ(ierr);
327         ierr = VecNorm(B0_Bv,NORM_INFINITY,&errorl);CHKERRQ(ierr);
328       }
329       ierr = MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
330       ierr = PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);CHKERRQ(ierr);
331     }
332     ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr);
333     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
334     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr);
335     ierr = VecDestroy(&vec2_global);CHKERRQ(ierr);
336 
337     /* restriction -> from parallel to local */
338     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
339     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
340     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
341     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
342     ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr);
343     ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr);
344     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
345     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
346     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
347     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr);
348     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
349     ierr = VecDestroy(&B0_Bv);CHKERRQ(ierr);
350     ierr = VecDestroy(&B0_Bv2);CHKERRQ(ierr);
351   }
352   PetscFunctionReturn(0);
353 }
354 
355 PetscErrorCode PCBDDCScalingDestroy(PC pc)
356 {
357   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;
358   PetscErrorCode ierr;
359 
360   PetscFunctionBegin;
361   if (pcbddc->deluxe_ctx) {
362     ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr);
363   }
364   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
365   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr);
366   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
371 {
372   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
373   PCBDDCDeluxeScaling deluxe_ctx;
374   PetscErrorCode      ierr;
375 
376   PetscFunctionBegin;
377   ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr);
378   pcbddc->deluxe_ctx = deluxe_ctx;
379   PetscFunctionReturn(0);
380 }
381 
382 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
383 {
384   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
385   PetscErrorCode      ierr;
386 
387   PetscFunctionBegin;
388   ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr);
389   ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
394 {
395   PetscInt       i;
396   PetscErrorCode ierr;
397 
398   PetscFunctionBegin;
399   ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
400   deluxe_ctx->n_simple = 0;
401   for (i=0;i<deluxe_ctx->seq_n;i++) {
402     ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr);
403     ierr = VecDestroy(&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
404     ierr = VecDestroy(&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
405     ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
406     ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
407   }
408   ierr = PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr);
409   ierr = PetscFree(deluxe_ctx->workspace);CHKERRQ(ierr);
410   deluxe_ctx->seq_n = 0;
411   PetscFunctionReturn(0);
412 }
413 
414 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
415 {
416   PC_IS               *pcis=(PC_IS*)pc->data;
417   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
418   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
419   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
420   PetscErrorCode      ierr;
421 
422   PetscFunctionBegin;
423   /* reset data structures if the topology has changed */
424   if (pcbddc->recompute_topography) {
425     ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr);
426   }
427 
428   /* Compute data structures to solve sequential problems */
429   ierr = PCBDDCScalingSetUp_Deluxe_Private(pc);CHKERRQ(ierr);
430 
431   /* diagonal scaling on interface dofs not contained in cc */
432   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
433     PetscInt n_com,n_dir;
434     n_com = 0;
435     if (sub_schurs->is_vertices) {
436       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_com);CHKERRQ(ierr);
437     }
438     n_dir = 0;
439     if (sub_schurs->is_dir) {
440       ierr = ISGetLocalSize(sub_schurs->is_dir,&n_dir);CHKERRQ(ierr);
441     }
442     if (!deluxe_ctx->n_simple) {
443       deluxe_ctx->n_simple = n_dir + n_com;
444       ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
445       if (sub_schurs->is_vertices) {
446         PetscInt       nmap;
447         const PetscInt *idxs;
448 
449         ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
450         ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
451         if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %d != %d",nmap,n_com);
452         ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
453       }
454       if (sub_schurs->is_dir) {
455         PetscInt       nmap;
456         const PetscInt *idxs;
457 
458         ierr = ISGetIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr);
459         ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);CHKERRQ(ierr);
460         if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %d != %d",nmap,n_dir);
461         ierr = ISRestoreIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr);
462       }
463       ierr = PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
464     } else {
465       if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %d is different from the previous one computed %d",n_dir + n_com,deluxe_ctx->n_simple);
466     }
467   } else {
468     deluxe_ctx->n_simple = 0;
469     deluxe_ctx->idx_simple_B = 0;
470   }
471   PetscFunctionReturn(0);
472 }
473 
474 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
475 {
476   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
477   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
478   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
479   PetscScalar            *matdata,*matdata2;
480   PetscInt               i,max_subset_size,cum,cum2;
481   const PetscInt         *idxs;
482   PetscBool              newsetup = PETSC_FALSE;
483   PetscErrorCode         ierr;
484 
485   PetscFunctionBegin;
486   if (!sub_schurs->n_subs) {
487     PetscFunctionReturn(0);
488   }
489 
490   /* Allocate arrays for subproblems */
491   if (!deluxe_ctx->seq_n) {
492     deluxe_ctx->seq_n = sub_schurs->n_subs;
493     ierr = PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr);
494     newsetup = PETSC_TRUE;
495   } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) {
496     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %d is different from the sub_schurs %d",deluxe_ctx->seq_n,sub_schurs->n_subs);
497   }
498   /* the change of basis is just a reference to sub_schurs->change (if any) */
499   deluxe_ctx->change         = sub_schurs->change;
500   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
501 
502   /* Create objects for deluxe */
503   max_subset_size = 0;
504   for (i=0;i<sub_schurs->n_subs;i++) {
505     PetscInt subset_size;
506     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
507     max_subset_size = PetscMax(subset_size,max_subset_size);
508   }
509   if (newsetup) {
510     ierr = PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);CHKERRQ(ierr);
511   }
512   cum = cum2 = 0;
513   ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr);
514   ierr = MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr);
515   ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr);
516   for (i=0;i<deluxe_ctx->seq_n;i++) {
517     PetscInt     subset_size;
518 
519     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
520     if (newsetup) {
521       IS  sub;
522       /* work vectors */
523       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
524       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);
525 
526       /* scatters */
527       ierr = ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);CHKERRQ(ierr);
528       ierr = VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr);
529       ierr = ISDestroy(&sub);CHKERRQ(ierr);
530     }
531 
532     /* S_E_j */
533     ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
534     ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
535 
536     /* \sum_k S^k_E_j */
537     ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
538     ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
539 
540     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
541       ierr = MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);CHKERRQ(ierr);
542     } else {
543       ierr = MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);CHKERRQ(ierr);
544     }
545     if (pcbddc->deluxe_singlemat) {
546       Mat X,Y;
547 
548       if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
549         ierr = MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);CHKERRQ(ierr);
550       } else {
551         ierr = PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
552         X    = deluxe_ctx->seq_mat[i];
553       }
554       ierr = MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);CHKERRQ(ierr);
555       if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
556         ierr = PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr);
557       } else {
558         ierr = MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr);
559       }
560 
561       ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
562       ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
563       ierr = MatDestroy(&X);CHKERRQ(ierr);
564       if (deluxe_ctx->change) {
565         Mat C,CY;
566 
567         if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis");
568         ierr = KSPGetOperators(deluxe_ctx->change[i],&C,NULL);CHKERRQ(ierr);
569         ierr = MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);CHKERRQ(ierr);
570         ierr = MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr);
571         ierr = MatDestroy(&CY);CHKERRQ(ierr);
572       } else {
573         ierr = MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);CHKERRQ(ierr);
574       }
575       deluxe_ctx->seq_mat[i] = Y;
576     }
577     cum += subset_size;
578     cum2 += subset_size*subset_size;
579   }
580   ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr);
581   ierr = MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr);
582   ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr);
583   if (pcbddc->deluxe_singlemat) {
584     deluxe_ctx->change         = NULL;
585     deluxe_ctx->change_with_qr = PETSC_FALSE;
586   }
587 
588   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
589     for (i=0;i<deluxe_ctx->seq_n;i++) {
590       if (newsetup) {
591         PC pc;
592 
593         ierr = KSPGetPC(deluxe_ctx->change[i],&pc);CHKERRQ(ierr);
594         ierr = PCSetType(pc,PCLU);CHKERRQ(ierr);
595         ierr = KSPSetFromOptions(deluxe_ctx->change[i]);CHKERRQ(ierr);
596       }
597       ierr = KSPSetUp(deluxe_ctx->change[i]);CHKERRQ(ierr);
598     }
599   }
600   PetscFunctionReturn(0);
601 }
602