xref: /petsc/src/ksp/pc/impls/bddc/bddcscalingbasic.c (revision 9bb4a8ca6688cafd8c6250e0011bc4b45f2808a0)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 
4 /* prototypes for deluxe functions */
5 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
6 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
7 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
8 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Par(PC,PetscInt,PetscInt,PetscInt[],PetscInt[]);
9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq(PC);
10 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "PCBDDCScalingExtension_Basic"
14 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
15 {
16   PC_IS* pcis = (PC_IS*)pc->data;
17   PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   /* Apply partition of unity */
22   ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr);
23   ierr = VecSet(global_vector,0.0);CHKERRQ(ierr);
24   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
25   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
26   PetscFunctionReturn(0);
27 }
28 
29 #undef __FUNCT__
30 #define __FUNCT__ "PCBDDCScalingExtension_Deluxe"
31 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
32 {
33   PC_IS*              pcis=(PC_IS*)pc->data;
34   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
35   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
36   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs;
37   PetscInt            i;
38   PetscErrorCode      ierr;
39 
40   /* TODO CHECK STUFF RELATED WITH FAKE WORK */
41   PetscFunctionBegin;
42   ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr); /* needed by the fake work below */
43   if (deluxe_ctx->n_simple) {
44     /* scale deluxe vertices using diagonal scaling */
45     PetscScalar *array_x,*array_D,*array;
46     ierr = VecGetArray(x,&array_x);CHKERRQ(ierr);
47     ierr = VecGetArray(pcis->D,&array_D);CHKERRQ(ierr);
48     ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
49     for (i=0;i<deluxe_ctx->n_simple;i++) {
50       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
51     }
52     ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
53     ierr = VecRestoreArray(pcis->D,&array_D);CHKERRQ(ierr);
54     ierr = VecRestoreArray(x,&array_x);CHKERRQ(ierr);
55   }
56   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication and ksp solution */
57   if (deluxe_ctx->seq_ksp) {
58     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,x,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
59     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,x,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
60     ierr = MatMultTranspose(sub_schurs->S_Ej_all,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2);CHKERRQ(ierr);
61     ierr = KSPSolveTranspose(deluxe_ctx->seq_ksp,deluxe_ctx->seq_work2,deluxe_ctx->seq_work1);CHKERRQ(ierr);
62     /* fake work due to final ADD VALUES and vertices scaling needed? TODO: check it */
63     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
64     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
65   }
66   /* parallel part */
67   for (i=0;i<deluxe_ctx->par_colors;i++) {
68     if (deluxe_ctx->par_ksp[i]) {
69       PetscMPIInt color_rank;
70       PetscInt    subidx = deluxe_ctx->par_col2sub[i];
71       /* restrict on subset */
72       ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],x,deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
73       ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],x,deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
74       /* S_Ej */
75       ierr = MatMult(sub_schurs->S_Ej[subidx],deluxe_ctx->par_work1[i],deluxe_ctx->par_work2[i]);CHKERRQ(ierr);
76       /* (\sum_j S_Ej)^-1 */
77       ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
78       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work2[i],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
79       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work2[i],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
80       ierr = KSPSolve(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
81       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)deluxe_ctx->par_ksp[i]),&color_rank);CHKERRQ(ierr);
82       /* get back solution on subset */
83       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
84       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
85       if (!color_rank) { /* only the master process in coloured comm copies the computed values */
86         ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],deluxe_ctx->par_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
87         ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],deluxe_ctx->par_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
88       }
89     }
90   }
91   /* put local boundary part in global vector */
92   ierr = VecSet(y,0.0);CHKERRQ(ierr);
93   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
94   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
95   PetscFunctionReturn(0);
96 }
97 
98 #undef __FUNCT__
99 #define __FUNCT__ "PCBDDCScalingExtension"
100 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
101 {
102   PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
103   PetscErrorCode ierr;
104 
105   PetscFunctionBegin;
106   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
107   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2);
108   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3);
109   if (local_interface_vector == pcbddc->work_scaling) {
110     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
111   }
112   ierr = PetscTryMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCBDDCScalingRestriction_Basic"
118 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
119 {
120   PetscErrorCode ierr;
121   PC_IS* pcis = (PC_IS*)pc->data;
122 
123   PetscFunctionBegin;
124   ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
125   ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
126   /* Apply partition of unity */
127   ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 #undef __FUNCT__
132 #define __FUNCT__ "PCBDDCScalingRestriction_Deluxe"
133 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
134 {
135   PC_IS*              pcis=(PC_IS*)pc->data;
136   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
137   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
138   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs;
139   PetscInt            i;
140   PetscErrorCode      ierr;
141 
142   PetscFunctionBegin;
143   /* get local boundary part of global vector */
144   ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
145   ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
146   if (deluxe_ctx->n_simple) {
147     /* scale deluxe vertices using diagonal scaling */
148     PetscScalar *array_y,*array_D;
149     ierr = VecGetArray(y,&array_y);CHKERRQ(ierr);
150     ierr = VecGetArray(pcis->D,&array_D);CHKERRQ(ierr);
151     for (i=0;i<deluxe_ctx->n_simple;i++) {
152       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
153     }
154     ierr = VecRestoreArray(pcis->D,&array_D);CHKERRQ(ierr);
155     ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr);
156   }
157   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication and ksp solution */
158   if (deluxe_ctx->seq_ksp) {
159     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,y,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
160     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,y,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
161     ierr = KSPSolve(deluxe_ctx->seq_ksp,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2);CHKERRQ(ierr);
162     ierr = MatMult(sub_schurs->S_Ej_all,deluxe_ctx->seq_work2,deluxe_ctx->seq_work1);CHKERRQ(ierr);
163     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
164     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
165   }
166   /* parallel part */
167   for (i=0;i<deluxe_ctx->par_colors;i++) {
168     if (deluxe_ctx->par_ksp[i]) {
169       PetscInt subidx = deluxe_ctx->par_col2sub[i];
170       /* restrict on subset */
171       ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],y,deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
172       ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],y,deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
173       /* (\sum_j S_Ej)^-T */
174       ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
175       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work1[i],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
176       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work1[i],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
177       ierr = KSPSolveTranspose(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
178       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
179       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
180       /* S_Ej^T */
181       ierr = MatMultTranspose(sub_schurs->S_Ej[subidx],deluxe_ctx->par_work1[i],deluxe_ctx->par_work2[i]);CHKERRQ(ierr);
182       /* extend to boundary */
183       ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],deluxe_ctx->par_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
184       ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],deluxe_ctx->par_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
185     }
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "PCBDDCScalingRestriction"
192 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
193 {
194   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
195   PetscErrorCode ierr;
196 
197   PetscFunctionBegin;
198   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
199   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2);
200   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3);
201   if (local_interface_vector == pcbddc->work_scaling) {
202     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Local vector should cannot be pcbddc->work_scaling!\n");
203   }
204   ierr = PetscTryMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr);
205   PetscFunctionReturn(0);
206 }
207 
208 #undef __FUNCT__
209 #define __FUNCT__ "PCBDDCScalingSetUp"
210 PetscErrorCode PCBDDCScalingSetUp(PC pc)
211 {
212   PC_IS* pcis=(PC_IS*)pc->data;
213   PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
214   PetscErrorCode ierr;
215 
216   PetscFunctionBegin;
217   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
218   /* create work vector for the operator */
219   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
220   ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr);
221   /* always rebuild pcis->D */
222   if (pcis->use_stiffness_scaling) {
223     ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr);
224     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
225     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
226   }
227   ierr = VecCopy(pcis->D,pcis->vec1_B);CHKERRQ(ierr);
228   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
229   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
230   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
231   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
232   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
233   ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr);
234   /* now setup */
235   if (pcbddc->use_deluxe_scaling) {
236     if (!pcbddc->deluxe_ctx) {
237       ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr);
238     }
239     ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr);
240     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr);
241     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr);
242   } else {
243     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr);
244     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr);
245   }
246 
247   /* test */
248   if (pcbddc->dbg_flag) {
249     Vec         vec2_global;
250     PetscViewer viewer=pcbddc->dbg_viewer;
251     PetscReal   error;
252 
253     /* extension -> from local to parallel */
254     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
255     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
256     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
257     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
258     ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr);
259     ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr);
260 
261     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
262     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
263     ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr);
264     ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr);
265     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
266     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr);
267     if (error>1.e-8 && pcbddc->dbg_flag>1) {
268       ierr = VecView(pcis->vec1_global,viewer);CHKERRQ(ierr);
269     }
270     ierr = VecDestroy(&vec2_global);CHKERRQ(ierr);
271 
272     /* restriction -> from parallel to local */
273     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
274     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
275     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
276     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
277 
278     ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr);
279     ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr);
280     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
281     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
282     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
283     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr);
284     if (error>1.e-8 && pcbddc->dbg_flag>1) {
285       ierr = VecView(pcis->vec1_global,viewer);CHKERRQ(ierr);
286     }
287   }
288   PetscFunctionReturn(0);
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "PCBDDCScalingDestroy"
293 PetscErrorCode PCBDDCScalingDestroy(PC pc)
294 {
295   PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
296   PetscErrorCode ierr;
297 
298   PetscFunctionBegin;
299   if (pcbddc->deluxe_ctx) {
300     ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr);
301   }
302   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
303   /* remove functions */
304   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr);
305   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "PCBDDCScalingCreate_Deluxe"
311 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
312 {
313   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
314   PCBDDCDeluxeScaling deluxe_ctx;
315   PetscErrorCode      ierr;
316 
317   PetscFunctionBegin;
318   ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr);
319   pcbddc->deluxe_ctx = deluxe_ctx;
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNCT__
324 #define __FUNCT__ "PCBDDCScalingDestroy_Deluxe"
325 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
326 {
327   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
328   PetscErrorCode      ierr;
329 
330   PetscFunctionBegin;
331   ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr);
332   ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr);
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "PCBDDCScalingReset_Deluxe_Solvers"
338 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
339 {
340   PetscErrorCode      ierr;
341 
342   PetscFunctionBegin;
343   ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
344   deluxe_ctx->n_simple = 0;
345   if (deluxe_ctx->seq_ksp) {
346     ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx);CHKERRQ(ierr);
347     ierr = VecDestroy(&deluxe_ctx->seq_work1);CHKERRQ(ierr);
348     ierr = VecDestroy(&deluxe_ctx->seq_work2);CHKERRQ(ierr);
349     ierr = KSPDestroy(&deluxe_ctx->seq_ksp);CHKERRQ(ierr);
350   }
351   if (deluxe_ctx->par_colors) {
352     PetscInt i;
353     for (i=0;i<deluxe_ctx->par_colors;i++) {
354       ierr = VecScatterDestroy(&deluxe_ctx->par_scctx_s[i]);CHKERRQ(ierr);
355       ierr = VecScatterDestroy(&deluxe_ctx->par_scctx_p[i]);CHKERRQ(ierr);
356       ierr = VecDestroy(&deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
357       ierr = VecDestroy(&deluxe_ctx->par_work1[i]);CHKERRQ(ierr);
358       ierr = VecDestroy(&deluxe_ctx->par_work2[i]);CHKERRQ(ierr);
359       ierr = KSPDestroy(&deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
360     }
361     ierr = PetscFree7(deluxe_ctx->par_ksp,
362                       deluxe_ctx->par_scctx_s,
363                       deluxe_ctx->par_scctx_p,
364                       deluxe_ctx->par_vec,
365                       deluxe_ctx->par_work1,
366                       deluxe_ctx->par_work2,
367                       deluxe_ctx->par_col2sub);CHKERRQ(ierr);
368   }
369   deluxe_ctx->par_colors = 0;
370   PetscFunctionReturn(0);
371 }
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe"
375 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
376 {
377   PC_IS               *pcis=(PC_IS*)pc->data;
378   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
379   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
380   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
381   PetscErrorCode      ierr;
382 
383   PetscFunctionBegin;
384   /* (TODO: reuse) throw away the solvers */
385   ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr);
386 
387   /* Compute data structures to solve parallel problems */
388   ierr = PCBDDCScalingSetUp_Deluxe_Par(pc,sub_schurs->n_subs_par,sub_schurs->n_subs_par_g,
389                                        sub_schurs->auxglobal_parallel,
390                                        sub_schurs->index_parallel);CHKERRQ(ierr);
391 
392   /* Compute data structures to solve sequential problems */
393   ierr = PCBDDCScalingSetUp_Deluxe_Seq(pc);CHKERRQ(ierr);
394 
395   /* diagonal scaling on interface dofs not contained in cc */
396   if (sub_schurs->is_Ej_com) {
397     PetscInt       nmap;
398     const PetscInt *idxs;
399     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&deluxe_ctx->n_simple);CHKERRQ(ierr);
400     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
401     ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
402     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,deluxe_ctx->n_simple,idxs,&nmap,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
403     if (nmap != deluxe_ctx->n_simple) {
404       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs! %d != %d",nmap,deluxe_ctx->n_simple);
405     }
406     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
407   } else {
408     deluxe_ctx->n_simple = 0;
409     deluxe_ctx->idx_simple_B = 0;
410   }
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Par"
416 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Par(PC pc, PetscInt n_local_parallel_problems,PetscInt n_parallel_problems,PetscInt global_parallel[],PetscInt index_parallel[])
417 {
418   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
419   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
420   /* coloring */
421   Mat                    parallel_problems;
422   MatColoring            coloring_obj;
423   ISColoring             coloring_parallel_problems;
424   IS                     *par_is_colors,*is_colors;
425   /* working stuff */
426   PetscInt               i,j;
427   PetscErrorCode         ierr;
428 
429   PetscFunctionBegin;
430   if (!n_parallel_problems) {
431     PetscFunctionReturn(0);
432   }
433   /* Color parallel subproblems */
434   ierr = MatCreate(PetscObjectComm((PetscObject)pc),&parallel_problems);CHKERRQ(ierr);
435   ierr = MatSetSizes(parallel_problems,PETSC_DECIDE,PETSC_DECIDE,n_parallel_problems,n_parallel_problems);CHKERRQ(ierr);
436   ierr = MatSetType(parallel_problems,MATAIJ);CHKERRQ(ierr);
437   ierr = MatSetUp(parallel_problems);CHKERRQ(ierr);
438   ierr = MatSetOption(parallel_problems,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
439   ierr = MatSetOption(parallel_problems,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
440   for (i=0;i<n_local_parallel_problems;i++) {
441     PetscInt row = global_parallel[i];
442     for (j=0;j<n_local_parallel_problems;j++) {
443       PetscInt col = global_parallel[j];
444       if (row != col) {
445         ierr = MatSetValue(parallel_problems,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
446       }
447     }
448   }
449   ierr = MatAssemblyBegin(parallel_problems,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
450   ierr = MatAssemblyEnd(parallel_problems,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
451   if (pcbddc->dbg_flag > 1) {
452     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
453     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Adj matrix for deluxe parallel problems\n");CHKERRQ(ierr);
454     ierr = MatView(parallel_problems,pcbddc->dbg_viewer);CHKERRQ(ierr);
455   }
456   ierr = MatColoringCreate(parallel_problems,&coloring_obj);CHKERRQ(ierr);
457   ierr = MatColoringSetDistance(coloring_obj,1);CHKERRQ(ierr);
458   ierr = MatColoringSetType(coloring_obj,MATCOLORINGJP);CHKERRQ(ierr);
459   ierr = MatColoringApply(coloring_obj,&coloring_parallel_problems);CHKERRQ(ierr);
460   ierr = ISColoringGetIS(coloring_parallel_problems,&deluxe_ctx->par_colors,&par_is_colors);CHKERRQ(ierr);
461   if (pcbddc->dbg_flag) {
462     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
463     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Number of colors %d for parallel part of deluxe\n",deluxe_ctx->par_colors);CHKERRQ(ierr);
464   }
465 
466   /* all procs should know the color distribution */
467   ierr = PetscMalloc1(deluxe_ctx->par_colors,&is_colors);CHKERRQ(ierr);
468   for (i=0;i<deluxe_ctx->par_colors;i++) {
469     if (pcbddc->dbg_flag) {
470       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Global problem indexes for color %d\n",i);CHKERRQ(ierr);
471       ierr = ISView(par_is_colors[i],pcbddc->dbg_viewer);CHKERRQ(ierr);
472       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
473     }
474     ierr = ISAllGather(par_is_colors[i],&is_colors[i]);CHKERRQ(ierr);
475   }
476 
477   /* free unneeded objects */
478   ierr = ISColoringRestoreIS(coloring_parallel_problems,&par_is_colors);CHKERRQ(ierr);
479   ierr = ISColoringDestroy(&coloring_parallel_problems);CHKERRQ(ierr);
480   ierr = MatColoringDestroy(&coloring_obj);CHKERRQ(ierr);
481   ierr = MatDestroy(&parallel_problems);CHKERRQ(ierr);
482 
483   /* allocate deluxe arrays for parallel problems */
484   ierr = PetscCalloc7(deluxe_ctx->par_colors,&deluxe_ctx->par_ksp,
485                       deluxe_ctx->par_colors,&deluxe_ctx->par_scctx_s,
486                       deluxe_ctx->par_colors,&deluxe_ctx->par_scctx_p,
487                       deluxe_ctx->par_colors,&deluxe_ctx->par_vec,
488                       deluxe_ctx->par_colors,&deluxe_ctx->par_work1,
489                       deluxe_ctx->par_colors,&deluxe_ctx->par_work2,
490                       deluxe_ctx->par_colors,&deluxe_ctx->par_col2sub);CHKERRQ(ierr);
491 
492   /* cycle on colors */
493   for (i=0;i<deluxe_ctx->par_colors;i++) {
494     PetscSubcomm    par_subcomm;
495     const PetscInt* idxs_subproblems;
496     PetscInt        color_size;
497     PetscMPIInt     rank,active_color;
498 
499     /* get local index of i-th parallel colored problem */
500     ierr = ISGetLocalSize(is_colors[i],&color_size);CHKERRQ(ierr);
501     ierr = ISGetIndices(is_colors[i],&idxs_subproblems);CHKERRQ(ierr);
502     /* split comm for computing parallel problems for this color */
503     /* Processes not partecipating at this stage will have color = color_size */
504     /* because PetscCommDuplicate does not handle MPI_COMM_NULL */
505     active_color = color_size;
506     deluxe_ctx->par_col2sub[i] = -1;
507     for (j=0;j<n_local_parallel_problems;j++) {
508       PetscInt local_idx;
509       ierr = PetscFindInt(global_parallel[j],color_size,idxs_subproblems,&local_idx);CHKERRQ(ierr);
510       if (local_idx > -1) {
511         ierr = PetscMPIIntCast(local_idx,&active_color);CHKERRQ(ierr);
512         deluxe_ctx->par_col2sub[i] = index_parallel[j];
513         break;
514       }
515     }
516     ierr = ISRestoreIndices(is_colors[i],&idxs_subproblems);CHKERRQ(ierr);
517     ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)pc),&par_subcomm);CHKERRQ(ierr);
518     ierr = PetscSubcommSetNumber(par_subcomm,color_size+1);CHKERRQ(ierr);
519     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
520     ierr = PetscSubcommSetTypeGeneral(par_subcomm,active_color,rank);CHKERRQ(ierr);
521     /* print debug info */
522     if (pcbddc->dbg_flag) {
523       PetscMPIInt crank,csize;
524       ierr = MPI_Comm_rank(par_subcomm->comm,&crank);CHKERRQ(ierr);
525       ierr = MPI_Comm_size(par_subcomm->comm,&csize);CHKERRQ(ierr);
526       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Color %d: size %d, details follows.\n",i,color_size);CHKERRQ(ierr);
527       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
528       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
529       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"  Subdomain %d: color in subcomm %d (rank %d out of %d) (lidx %d)\n",PetscGlobalRank,par_subcomm->color,crank,csize,deluxe_ctx->par_col2sub[i]);CHKERRQ(ierr);
530       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
531     }
532 
533     if (deluxe_ctx->par_col2sub[i] >= 0) {
534       PC                     pctemp;
535       PC_IS                  *pcis=(PC_IS*)pc->data;
536       Mat                    color_mat,color_mat_is,temp_mat;
537       ISLocalToGlobalMapping WtoNmap,l2gmap_subset;
538       IS                     is_local_numbering,isB_local,isW_local,isW;
539       PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
540       PetscInt               subidx,n_local_dofs,n_global_dofs;
541       PetscInt               *global_numbering,*local_numbering;
542       char                   ksp_prefix[256];
543       size_t                 len;
544 
545       /* Local index for schur complement on subset */
546       subidx = deluxe_ctx->par_col2sub[i];
547 
548       /* Parallel numbering for dofs in colored subset */
549       ierr = ISSum(sub_schurs->is_I_layer,sub_schurs->is_subs[subidx],&is_local_numbering);CHKERRQ(ierr);
550       ierr = ISGetLocalSize(is_local_numbering,&n_local_dofs);CHKERRQ(ierr);
551       ierr = ISGetIndices(is_local_numbering,(const PetscInt **)&local_numbering);CHKERRQ(ierr);
552       ierr = PCBDDCSubsetNumbering(par_subcomm->comm,pcbddc->mat_graph->l2gmap,n_local_dofs,local_numbering,PETSC_NULL,&n_global_dofs,&global_numbering);CHKERRQ(ierr);
553       ierr = ISRestoreIndices(is_local_numbering,(const PetscInt **)&local_numbering);CHKERRQ(ierr);
554 
555       /* L2Gmap from relevant dofs to local dofs */
556       ierr = ISLocalToGlobalMappingCreateIS(is_local_numbering,&WtoNmap);CHKERRQ(ierr);
557 
558       /* L2Gmap from local to global dofs */
559       ierr = ISLocalToGlobalMappingCreate(par_subcomm->comm,1,n_local_dofs,global_numbering,PETSC_COPY_VALUES,&l2gmap_subset);CHKERRQ(ierr);
560 
561       /* compute parallel matrix (extended dirichlet problem on subset) */
562       ierr = MatCreateIS(par_subcomm->comm,1,PETSC_DECIDE,PETSC_DECIDE,n_global_dofs,n_global_dofs,l2gmap_subset,&color_mat_is);CHKERRQ(ierr);
563       ierr = MatGetSubMatrix(pcbddc->local_mat,is_local_numbering,is_local_numbering,MAT_INITIAL_MATRIX,&temp_mat);CHKERRQ(ierr);
564       ierr = MatISSetLocalMat(color_mat_is,temp_mat);CHKERRQ(ierr);
565       ierr = MatDestroy(&temp_mat);CHKERRQ(ierr);
566       ierr = MatISGetMPIXAIJ(color_mat_is,MAT_INITIAL_MATRIX,&color_mat);CHKERRQ(ierr);
567       ierr = MatDestroy(&color_mat_is);CHKERRQ(ierr);
568 
569       /* work vector for (parallel) extended dirichlet problem */
570       ierr = MatCreateVecs(color_mat,&deluxe_ctx->par_vec[i],NULL);CHKERRQ(ierr);
571 
572       /* compute scatters */
573       /* deluxe_ctx->par_scctx_p[i] extension from local subset to extended dirichlet problem
574          deluxe_ctx->par_scctx_s[i] restriction from local boundary to subset -> simple copy of selected values */
575       ierr = ISGlobalToLocalMappingApplyIS(pcis->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[subidx],&isB_local);CHKERRQ(ierr);
576       ierr = MatCreateVecs(sub_schurs->S_Ej[subidx],&deluxe_ctx->par_work1[i],&deluxe_ctx->par_work2[i]);CHKERRQ(ierr);
577       ierr = VecScatterCreate(pcbddc->work_scaling,isB_local,deluxe_ctx->par_work1[i],NULL,&deluxe_ctx->par_scctx_s[i]);CHKERRQ(ierr);
578       ierr = ISGlobalToLocalMappingApplyIS(WtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[subidx],&isW_local);CHKERRQ(ierr);
579       ierr = ISLocalToGlobalMappingApplyIS(l2gmap_subset,isW_local,&isW);CHKERRQ(ierr);
580       ierr = VecScatterCreate(deluxe_ctx->par_work1[i],NULL,deluxe_ctx->par_vec[i],isW,&deluxe_ctx->par_scctx_p[i]);CHKERRQ(ierr);
581 
582       /* free objects no longer neeeded */
583       ierr = ISDestroy(&isW);CHKERRQ(ierr);
584       ierr = ISDestroy(&isW_local);CHKERRQ(ierr);
585       ierr = ISDestroy(&isB_local);CHKERRQ(ierr);
586       ierr = ISLocalToGlobalMappingDestroy(&WtoNmap);CHKERRQ(ierr);
587       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subset);CHKERRQ(ierr);
588       ierr = ISDestroy(&is_local_numbering);CHKERRQ(ierr);
589       ierr = PetscFree(global_numbering);CHKERRQ(ierr);
590 
591       /* KSP for extended dirichlet problem */
592       ierr = KSPCreate(par_subcomm->comm,&deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
593       ierr = KSPSetOperators(deluxe_ctx->par_ksp[i],color_mat,color_mat);CHKERRQ(ierr);
594       ierr = KSPSetTolerances(deluxe_ctx->par_ksp[i],1.e-12,1.e-12,1.e10,10000);CHKERRQ(ierr);
595       ierr = KSPSetType(deluxe_ctx->par_ksp[i],KSPPREONLY);CHKERRQ(ierr);
596       ierr = KSPGetPC(deluxe_ctx->par_ksp[i],&pctemp);CHKERRQ(ierr);
597       ierr = PCSetType(pctemp,PCREDUNDANT);CHKERRQ(ierr);
598       ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr);
599       len -= 10; /* remove "dirichlet_" */
600       ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len+1);CHKERRQ(ierr); /* PetscStrncpy puts a terminating char at the end */
601       ierr = PetscStrcat(ksp_prefix,"deluxe_par_");CHKERRQ(ierr);
602       ierr = KSPSetOptionsPrefix(deluxe_ctx->par_ksp[i],ksp_prefix);CHKERRQ(ierr);
603       ierr = KSPSetFromOptions(deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
604       ierr = KSPSetUp(deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
605       ierr = MatDestroy(&color_mat);CHKERRQ(ierr);
606     }
607     ierr = PetscSubcommDestroy(&par_subcomm);CHKERRQ(ierr);
608   }
609   for (i=0;i<deluxe_ctx->par_colors;i++) {
610     ierr = ISDestroy(&is_colors[i]);CHKERRQ(ierr);
611   }
612   ierr = PetscFree(is_colors);CHKERRQ(ierr);
613 
614   if (pcbddc->dbg_flag) {
615     Vec test_vec;
616     PetscReal error;
617     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
618     /* test partition of unity of coloured schur complements  */
619     for (i=0;i<deluxe_ctx->par_colors;i++) {
620       PetscInt  subidx = deluxe_ctx->par_col2sub[i];
621       PetscBool error_found = PETSC_FALSE;
622       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
623 
624       if (deluxe_ctx->par_ksp[i]) {
625         /* create random test vec being zero on internal nodes of the extende dirichlet problem */
626         ierr = VecDuplicate(deluxe_ctx->par_vec[i],&test_vec);CHKERRQ(ierr);
627         ierr = VecSetRandom(deluxe_ctx->par_work1[i],PETSC_NULL);CHKERRQ(ierr);
628         ierr = VecSet(test_vec,0.0);CHKERRQ(ierr);
629         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work1[i],test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
630         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work1[i],test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
631         /* w_j */
632         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],test_vec,deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
633         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],test_vec,deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
634         /* S_j*w_j */
635         ierr = MatMult(sub_schurs->S_Ej[subidx],deluxe_ctx->par_work1[i],deluxe_ctx->par_work2[i]);CHKERRQ(ierr);
636         /* \sum_j S_j*w_j */
637         ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
638         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work2[i],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
639         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work2[i],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
640         /* (\sum_j S_j)^(-1)(\sum_j S_j*w_j) */
641         ierr = KSPSolve(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
642         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
643         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
644         ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
645         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work1[i],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
646         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work1[i],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
647         /* test partition of unity */
648         ierr = VecAXPY(test_vec,-1.0,deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
649         ierr = VecNorm(test_vec,NORM_INFINITY,&error);CHKERRQ(ierr);
650         if (PetscAbsReal(error) > 1.e-2) {
651           /* ierr = VecView(test_vec,0);CHKERRQ(ierr); */
652           error_found = PETSC_TRUE;
653         }
654         ierr = VecDestroy(&test_vec);CHKERRQ(ierr);
655       }
656       if (error_found) {
657         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Error testing local schur for color %d and subdomain %d\n",i,PetscGlobalRank);CHKERRQ(ierr);
658       }
659       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
660     }
661   }
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNCT__
666 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Seq"
667 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq(PC pc)
668 {
669   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
670   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
671   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
672   PC                     pc_temp;
673   MatSolverPackage       solver=NULL;
674   char                   ksp_prefix[256];
675   size_t                 len;
676   PetscInt               local_size;
677   PetscErrorCode         ierr;
678 
679   PetscFunctionBegin;
680   if (!sub_schurs->n_subs_seq_g) {
681     PetscFunctionReturn(0);
682   }
683 
684   /* Create work vectors for sequential part of deluxe */
685   ierr = MatCreateVecs(sub_schurs->sum_S_Ej_all,&deluxe_ctx->seq_work1,&deluxe_ctx->seq_work2);CHKERRQ(ierr);
686 
687   /* Compute deluxe sequential scatter */
688   ierr = VecScatterCreate(pcbddc->work_scaling,sub_schurs->is_Ej_all,deluxe_ctx->seq_work1,NULL,&deluxe_ctx->seq_scctx);CHKERRQ(ierr);
689 
690   /* Create KSP object for sequential part of deluxe scaling */
691   ierr = KSPCreate(PETSC_COMM_SELF,&deluxe_ctx->seq_ksp);CHKERRQ(ierr);
692   ierr = KSPSetOperators(deluxe_ctx->seq_ksp,sub_schurs->sum_S_Ej_all,sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
693   ierr = KSPSetType(deluxe_ctx->seq_ksp,KSPPREONLY);CHKERRQ(ierr);
694   ierr = KSPGetPC(deluxe_ctx->seq_ksp,&pc_temp);CHKERRQ(ierr);
695   ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
696   ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
697   ierr = PCFactorGetMatSolverPackage(pc_temp,(const MatSolverPackage*)&solver);CHKERRQ(ierr);
698   ierr = MatGetSize(sub_schurs->sum_S_Ej_all,&local_size,NULL);CHKERRQ(ierr);
699   if (solver && local_size) { /* if local_size is null, some external packages will report errors */
700     PC     new_pc;
701     PCType type;
702     ierr = PCGetType(pc_temp,&type);CHKERRQ(ierr);
703     ierr = KSPGetPC(deluxe_ctx->seq_ksp,&new_pc);CHKERRQ(ierr);
704     ierr = PCSetType(new_pc,type);CHKERRQ(ierr);
705     ierr = PCFactorSetMatSolverPackage(new_pc,solver);CHKERRQ(ierr);
706   }
707   ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr);
708   len -= 10; /* remove "dirichlet_" */
709   ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len+1);CHKERRQ(ierr);
710   ierr = PetscStrcat(ksp_prefix,"deluxe_seq_");CHKERRQ(ierr);
711   ierr = KSPSetOptionsPrefix(deluxe_ctx->seq_ksp,ksp_prefix);CHKERRQ(ierr);
712   if (local_size) {
713     ierr = KSPSetFromOptions(deluxe_ctx->seq_ksp);CHKERRQ(ierr);
714   }
715   ierr = KSPSetUp(deluxe_ctx->seq_ksp);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718