xref: /petsc/src/ksp/pc/impls/bddc/bddcscalingbasic.c (revision 4e7385fac521a32d019ba1b91cac4ccec6d80411)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 
4 /* prototypes for deluxe functions */
5 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
6 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
7 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
8 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Par(PC,PetscInt,PetscInt,PetscInt[],PetscInt[]);
9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq(PC);
10 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "PCBDDCScalingExtension_Basic"
14 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
15 {
16   PC_IS* pcis = (PC_IS*)pc->data;
17   PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   /* Apply partition of unity */
22   ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr);
23   ierr = VecSet(global_vector,0.0);CHKERRQ(ierr);
24   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
25   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
26   PetscFunctionReturn(0);
27 }
28 
29 #undef __FUNCT__
30 #define __FUNCT__ "PCBDDCScalingExtension_Deluxe"
31 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
32 {
33   PC_IS*              pcis=(PC_IS*)pc->data;
34   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
35   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
36   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs;
37   PetscInt            i;
38   PetscErrorCode      ierr;
39 
40   /* TODO CHECK STUFF RELATED WITH FAKE WORK */
41   PetscFunctionBegin;
42   ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr); /* needed by the fake work below */
43   if (deluxe_ctx->n_simple) {
44     /* scale deluxe vertices using diagonal scaling */
45     const PetscScalar *array_x,*array_D;
46     PetscScalar       *array;
47     ierr = VecGetArrayRead(x,&array_x);CHKERRQ(ierr);
48     ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
49     ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
50     for (i=0;i<deluxe_ctx->n_simple;i++) {
51       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
52     }
53     ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
54     ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
55     ierr = VecRestoreArrayRead(x,&array_x);CHKERRQ(ierr);
56   }
57   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication and ksp solution */
58   if (deluxe_ctx->seq_ksp) {
59     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,x,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
60     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,x,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
61     ierr = MatMultTranspose(sub_schurs->S_Ej_all,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2);CHKERRQ(ierr);
62     ierr = KSPSolveTranspose(deluxe_ctx->seq_ksp,deluxe_ctx->seq_work2,deluxe_ctx->seq_work1);CHKERRQ(ierr);
63     /* fake work due to final ADD VALUES and vertices scaling needed? TODO: check it */
64     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
65     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
66   }
67   /* parallel part */
68   for (i=0;i<deluxe_ctx->par_colors;i++) {
69     if (deluxe_ctx->par_ksp[i]) {
70       PetscMPIInt color_rank;
71       PetscInt    subidx = deluxe_ctx->par_col2sub[i];
72       /* restrict on subset */
73       ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],x,deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
74       ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],x,deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
75       /* S_Ej */
76       ierr = MatMult(sub_schurs->S_Ej[subidx],deluxe_ctx->par_work1[i],deluxe_ctx->par_work2[i]);CHKERRQ(ierr);
77       /* (\sum_j S_Ej)^-1 */
78       ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
79       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work2[i],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
80       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work2[i],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81       ierr = KSPSolve(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
82       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)deluxe_ctx->par_ksp[i]),&color_rank);CHKERRQ(ierr);
83       /* get back solution on subset */
84       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
85       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
86       if (!color_rank) { /* only the master process in coloured comm copies the computed values */
87         ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],deluxe_ctx->par_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
88         ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],deluxe_ctx->par_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
89       }
90     }
91   }
92   /* put local boundary part in global vector */
93   ierr = VecSet(y,0.0);CHKERRQ(ierr);
94   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
95   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "PCBDDCScalingExtension"
101 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
102 {
103   PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
104   PetscErrorCode ierr;
105 
106   PetscFunctionBegin;
107   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
108   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2);
109   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3);
110   if (local_interface_vector == pcbddc->work_scaling) {
111     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
112   }
113   ierr = PetscTryMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr);
114   PetscFunctionReturn(0);
115 }
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "PCBDDCScalingRestriction_Basic"
119 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
120 {
121   PetscErrorCode ierr;
122   PC_IS* pcis = (PC_IS*)pc->data;
123 
124   PetscFunctionBegin;
125   ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
126   ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
127   /* Apply partition of unity */
128   ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "PCBDDCScalingRestriction_Deluxe"
134 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
135 {
136   PC_IS*              pcis=(PC_IS*)pc->data;
137   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
138   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
139   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs;
140   PetscInt            i;
141   PetscErrorCode      ierr;
142 
143   PetscFunctionBegin;
144   /* get local boundary part of global vector */
145   ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
146   ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
147   if (deluxe_ctx->n_simple) {
148     /* scale deluxe vertices using diagonal scaling */
149     PetscScalar       *array_y;
150     const PetscScalar *array_D;
151     ierr = VecGetArray(y,&array_y);CHKERRQ(ierr);
152     ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
153     for (i=0;i<deluxe_ctx->n_simple;i++) {
154       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
155     }
156     ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr);
157     ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr);
158   }
159   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication and ksp solution */
160   if (deluxe_ctx->seq_ksp) {
161     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,y,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
162     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,y,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
163     ierr = KSPSolve(deluxe_ctx->seq_ksp,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2);CHKERRQ(ierr);
164     ierr = MatMult(sub_schurs->S_Ej_all,deluxe_ctx->seq_work2,deluxe_ctx->seq_work1);CHKERRQ(ierr);
165     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
166     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
167   }
168   /* parallel part */
169   for (i=0;i<deluxe_ctx->par_colors;i++) {
170     if (deluxe_ctx->par_ksp[i]) {
171       PetscInt subidx = deluxe_ctx->par_col2sub[i];
172       /* restrict on subset */
173       ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],y,deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
174       ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],y,deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
175       /* (\sum_j S_Ej)^-T */
176       ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
177       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work1[i],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
178       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work1[i],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
179       ierr = KSPSolveTranspose(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
180       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
181       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
182       /* S_Ej^T */
183       ierr = MatMultTranspose(sub_schurs->S_Ej[subidx],deluxe_ctx->par_work1[i],deluxe_ctx->par_work2[i]);CHKERRQ(ierr);
184       /* extend to boundary */
185       ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],deluxe_ctx->par_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
186       ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],deluxe_ctx->par_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
187     }
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "PCBDDCScalingRestriction"
194 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
195 {
196   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
197   PetscErrorCode ierr;
198 
199   PetscFunctionBegin;
200   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
201   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2);
202   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3);
203   if (local_interface_vector == pcbddc->work_scaling) {
204     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Local vector should cannot be pcbddc->work_scaling!\n");
205   }
206   ierr = PetscTryMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr);
207   PetscFunctionReturn(0);
208 }
209 
210 #undef __FUNCT__
211 #define __FUNCT__ "PCBDDCScalingSetUp"
212 PetscErrorCode PCBDDCScalingSetUp(PC pc)
213 {
214   PC_IS* pcis=(PC_IS*)pc->data;
215   PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
216   PetscErrorCode ierr;
217 
218   PetscFunctionBegin;
219   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
220   /* create work vector for the operator */
221   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
222   ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr);
223   /* always rebuild pcis->D */
224   if (pcis->use_stiffness_scaling) {
225     ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr);
226     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
227     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
228   }
229   ierr = VecCopy(pcis->D,pcis->vec1_B);CHKERRQ(ierr);
230   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
231   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
232   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
233   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
234   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
235   ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr);
236   /* now setup */
237   if (pcbddc->use_deluxe_scaling) {
238     if (!pcbddc->deluxe_ctx) {
239       ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr);
240     }
241     ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr);
242     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr);
243     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr);
244   } else {
245     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr);
246     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr);
247   }
248 
249   /* test */
250   if (pcbddc->dbg_flag) {
251     Vec         vec2_global;
252     PetscViewer viewer=pcbddc->dbg_viewer;
253     PetscReal   error;
254 
255     /* extension -> from local to parallel */
256     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
257     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
258     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
259     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
260     ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr);
261     ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr);
262 
263     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
264     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
265     ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr);
266     ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr);
267     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
268     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr);
269     if (error>1.e-8 && pcbddc->dbg_flag>1) {
270       ierr = VecView(pcis->vec1_global,viewer);CHKERRQ(ierr);
271     }
272     ierr = VecDestroy(&vec2_global);CHKERRQ(ierr);
273 
274     /* restriction -> from parallel to local */
275     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
276     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
277     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
278     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
279 
280     ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr);
281     ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr);
282     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
283     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
284     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
285     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr);
286     if (error>1.e-8 && pcbddc->dbg_flag>1) {
287       ierr = VecView(pcis->vec1_global,viewer);CHKERRQ(ierr);
288     }
289   }
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "PCBDDCScalingDestroy"
295 PetscErrorCode PCBDDCScalingDestroy(PC pc)
296 {
297   PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
298   PetscErrorCode ierr;
299 
300   PetscFunctionBegin;
301   if (pcbddc->deluxe_ctx) {
302     ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr);
303   }
304   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
305   /* remove functions */
306   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr);
307   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr);
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "PCBDDCScalingCreate_Deluxe"
313 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
314 {
315   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
316   PCBDDCDeluxeScaling deluxe_ctx;
317   PetscErrorCode      ierr;
318 
319   PetscFunctionBegin;
320   ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr);
321   pcbddc->deluxe_ctx = deluxe_ctx;
322   PetscFunctionReturn(0);
323 }
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "PCBDDCScalingDestroy_Deluxe"
327 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
328 {
329   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
330   PetscErrorCode      ierr;
331 
332   PetscFunctionBegin;
333   ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr);
334   ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr);
335   PetscFunctionReturn(0);
336 }
337 
338 #undef __FUNCT__
339 #define __FUNCT__ "PCBDDCScalingReset_Deluxe_Solvers"
340 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
341 {
342   PetscErrorCode      ierr;
343 
344   PetscFunctionBegin;
345   ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
346   deluxe_ctx->n_simple = 0;
347   if (deluxe_ctx->seq_ksp) {
348     ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx);CHKERRQ(ierr);
349     ierr = VecDestroy(&deluxe_ctx->seq_work1);CHKERRQ(ierr);
350     ierr = VecDestroy(&deluxe_ctx->seq_work2);CHKERRQ(ierr);
351     ierr = KSPDestroy(&deluxe_ctx->seq_ksp);CHKERRQ(ierr);
352   }
353   if (deluxe_ctx->par_colors) {
354     PetscInt i;
355     for (i=0;i<deluxe_ctx->par_colors;i++) {
356       ierr = VecScatterDestroy(&deluxe_ctx->par_scctx_s[i]);CHKERRQ(ierr);
357       ierr = VecScatterDestroy(&deluxe_ctx->par_scctx_p[i]);CHKERRQ(ierr);
358       ierr = VecDestroy(&deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
359       ierr = VecDestroy(&deluxe_ctx->par_work1[i]);CHKERRQ(ierr);
360       ierr = VecDestroy(&deluxe_ctx->par_work2[i]);CHKERRQ(ierr);
361       ierr = KSPDestroy(&deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
362     }
363     ierr = PetscFree7(deluxe_ctx->par_ksp,
364                       deluxe_ctx->par_scctx_s,
365                       deluxe_ctx->par_scctx_p,
366                       deluxe_ctx->par_vec,
367                       deluxe_ctx->par_work1,
368                       deluxe_ctx->par_work2,
369                       deluxe_ctx->par_col2sub);CHKERRQ(ierr);
370   }
371   deluxe_ctx->par_colors = 0;
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe"
377 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
378 {
379   PC_IS               *pcis=(PC_IS*)pc->data;
380   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
381   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
382   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
383   IS                  dirIS;
384   PetscErrorCode      ierr;
385 
386   PetscFunctionBegin;
387   /* (TODO: reuse) throw away the solvers */
388   ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr);
389 
390   /* Compute data structures to solve parallel problems */
391   ierr = PCBDDCScalingSetUp_Deluxe_Par(pc,sub_schurs->n_subs_par,sub_schurs->n_subs_par_g,
392                                        sub_schurs->auxglobal_parallel,
393                                        sub_schurs->index_parallel);CHKERRQ(ierr);
394 
395   /* Compute data structures to solve sequential problems */
396   ierr = PCBDDCScalingSetUp_Deluxe_Seq(pc);CHKERRQ(ierr);
397 
398   /* diagonal scaling on interface dofs not contained in cc */
399   dirIS = NULL;
400   if (pcbddc->DirichletBoundariesLocal) {
401     ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr);
402   }
403   if (sub_schurs->is_Ej_com || dirIS) {
404     PetscInt n_com,n_dir;
405     n_com = 0;
406     if (sub_schurs->is_Ej_com) {
407       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_com);CHKERRQ(ierr);
408     }
409     n_dir = 0;
410     if (dirIS) {
411       ierr = ISGetLocalSize(dirIS,&n_dir);CHKERRQ(ierr);
412     }
413     deluxe_ctx->n_simple = n_dir + n_com;
414     ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
415     if (sub_schurs->is_Ej_com) {
416       PetscInt       nmap;
417       const PetscInt *idxs;
418 
419       ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
420       ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
421       if (nmap != n_com) {
422         SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_Ej_com)! %d != %d",nmap,n_com);
423       }
424       ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
425     }
426     if (dirIS) {
427       PetscInt       nmap;
428       const PetscInt *idxs;
429 
430       ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr);
431       ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);CHKERRQ(ierr);
432       ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr);
433     }
434     ierr = PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
435   } else {
436     deluxe_ctx->n_simple = 0;
437     deluxe_ctx->idx_simple_B = 0;
438   }
439   ierr = ISDestroy(&dirIS);CHKERRQ(ierr);
440 
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Par"
446 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Par(PC pc, PetscInt n_local_parallel_problems,PetscInt n_parallel_problems,PetscInt global_parallel[],PetscInt index_parallel[])
447 {
448   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
449   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
450   /* coloring */
451   Mat                    parallel_problems;
452   MatColoring            coloring_obj;
453   ISColoring             coloring_parallel_problems;
454   IS                     *par_is_colors,*is_colors;
455   /* working stuff */
456   PetscInt               i,j;
457   PetscErrorCode         ierr;
458 
459   PetscFunctionBegin;
460   if (!n_parallel_problems) {
461     PetscFunctionReturn(0);
462   }
463   /* Color parallel subproblems */
464   ierr = MatCreate(PetscObjectComm((PetscObject)pc),&parallel_problems);CHKERRQ(ierr);
465   ierr = MatSetSizes(parallel_problems,PETSC_DECIDE,PETSC_DECIDE,n_parallel_problems,n_parallel_problems);CHKERRQ(ierr);
466   ierr = MatSetType(parallel_problems,MATAIJ);CHKERRQ(ierr);
467   ierr = MatSetUp(parallel_problems);CHKERRQ(ierr);
468   ierr = MatSetOption(parallel_problems,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
469   ierr = MatSetOption(parallel_problems,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
470   for (i=0;i<n_local_parallel_problems;i++) {
471     PetscInt row = global_parallel[i];
472     for (j=0;j<n_local_parallel_problems;j++) {
473       PetscInt col = global_parallel[j];
474       if (row != col) {
475         ierr = MatSetValue(parallel_problems,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
476       }
477     }
478   }
479   ierr = MatAssemblyBegin(parallel_problems,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
480   ierr = MatAssemblyEnd(parallel_problems,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
481   if (pcbddc->dbg_flag > 1) {
482     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
483     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Adj matrix for deluxe parallel problems\n");CHKERRQ(ierr);
484     ierr = MatView(parallel_problems,pcbddc->dbg_viewer);CHKERRQ(ierr);
485   }
486   ierr = MatColoringCreate(parallel_problems,&coloring_obj);CHKERRQ(ierr);
487   ierr = MatColoringSetDistance(coloring_obj,1);CHKERRQ(ierr);
488   ierr = MatColoringSetType(coloring_obj,MATCOLORINGJP);CHKERRQ(ierr);
489   ierr = MatColoringApply(coloring_obj,&coloring_parallel_problems);CHKERRQ(ierr);
490   ierr = ISColoringGetIS(coloring_parallel_problems,&deluxe_ctx->par_colors,&par_is_colors);CHKERRQ(ierr);
491   if (pcbddc->dbg_flag) {
492     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
493     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Number of colors %d for parallel part of deluxe\n",deluxe_ctx->par_colors);CHKERRQ(ierr);
494   }
495 
496   /* all procs should know the color distribution */
497   ierr = PetscMalloc1(deluxe_ctx->par_colors,&is_colors);CHKERRQ(ierr);
498   for (i=0;i<deluxe_ctx->par_colors;i++) {
499     if (pcbddc->dbg_flag) {
500       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Global problem indexes for color %d\n",i);CHKERRQ(ierr);
501       ierr = ISView(par_is_colors[i],pcbddc->dbg_viewer);CHKERRQ(ierr);
502       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
503     }
504     ierr = ISAllGather(par_is_colors[i],&is_colors[i]);CHKERRQ(ierr);
505   }
506 
507   /* free unneeded objects */
508   ierr = ISColoringRestoreIS(coloring_parallel_problems,&par_is_colors);CHKERRQ(ierr);
509   ierr = ISColoringDestroy(&coloring_parallel_problems);CHKERRQ(ierr);
510   ierr = MatColoringDestroy(&coloring_obj);CHKERRQ(ierr);
511   ierr = MatDestroy(&parallel_problems);CHKERRQ(ierr);
512 
513   /* allocate deluxe arrays for parallel problems */
514   ierr = PetscCalloc7(deluxe_ctx->par_colors,&deluxe_ctx->par_ksp,
515                       deluxe_ctx->par_colors,&deluxe_ctx->par_scctx_s,
516                       deluxe_ctx->par_colors,&deluxe_ctx->par_scctx_p,
517                       deluxe_ctx->par_colors,&deluxe_ctx->par_vec,
518                       deluxe_ctx->par_colors,&deluxe_ctx->par_work1,
519                       deluxe_ctx->par_colors,&deluxe_ctx->par_work2,
520                       deluxe_ctx->par_colors,&deluxe_ctx->par_col2sub);CHKERRQ(ierr);
521 
522   /* cycle on colors */
523   for (i=0;i<deluxe_ctx->par_colors;i++) {
524     PetscSubcomm    par_subcomm;
525     const PetscInt* idxs_subproblems;
526     PetscInt        color_size;
527     PetscMPIInt     rank,active_color;
528 
529     /* get local index of i-th parallel colored problem */
530     ierr = ISGetLocalSize(is_colors[i],&color_size);CHKERRQ(ierr);
531     ierr = ISGetIndices(is_colors[i],&idxs_subproblems);CHKERRQ(ierr);
532     /* split comm for computing parallel problems for this color */
533     /* Processes not partecipating at this stage will have color = color_size */
534     /* because PetscCommDuplicate does not handle MPI_COMM_NULL */
535     active_color = color_size;
536     deluxe_ctx->par_col2sub[i] = -1;
537     for (j=0;j<n_local_parallel_problems;j++) {
538       PetscInt local_idx;
539       ierr = PetscFindInt(global_parallel[j],color_size,idxs_subproblems,&local_idx);CHKERRQ(ierr);
540       if (local_idx > -1) {
541         ierr = PetscMPIIntCast(local_idx,&active_color);CHKERRQ(ierr);
542         deluxe_ctx->par_col2sub[i] = index_parallel[j];
543         break;
544       }
545     }
546     ierr = ISRestoreIndices(is_colors[i],&idxs_subproblems);CHKERRQ(ierr);
547     ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)pc),&par_subcomm);CHKERRQ(ierr);
548     ierr = PetscSubcommSetNumber(par_subcomm,color_size+1);CHKERRQ(ierr);
549     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
550     ierr = PetscSubcommSetTypeGeneral(par_subcomm,active_color,rank);CHKERRQ(ierr);
551     /* print debug info */
552     if (pcbddc->dbg_flag) {
553       PetscMPIInt crank,csize;
554       ierr = MPI_Comm_rank(PetscSubcommChild(par_subcomm),&crank);CHKERRQ(ierr);
555       ierr = MPI_Comm_size(PetscSubcommChild(par_subcomm),&csize);CHKERRQ(ierr);
556       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Color %d: size %d, details follows.\n",i,color_size);CHKERRQ(ierr);
557       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
558       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
559       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"  Subdomain %d: color in subcomm %d (rank %d out of %d) (lidx %d)\n",PetscGlobalRank,par_subcomm->color,crank,csize,deluxe_ctx->par_col2sub[i]);CHKERRQ(ierr);
560       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
561     }
562 
563     if (deluxe_ctx->par_col2sub[i] >= 0) {
564       PC                     pctemp;
565       PC_IS                  *pcis=(PC_IS*)pc->data;
566       Mat                    color_mat,color_mat_is,temp_mat;
567       ISLocalToGlobalMapping WtoNmap,l2gmap_subset;
568       IS                     is_local_numbering,isB_local,isW_local,isW;
569       PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
570       PetscInt               subidx,n_local_dofs,n_global_dofs;
571       PetscInt               *global_numbering,*local_numbering;
572       char                   ksp_prefix[256];
573       size_t                 len;
574 
575       /* Local index for schur complement on subset */
576       subidx = deluxe_ctx->par_col2sub[i];
577 
578       /* Parallel numbering for dofs in colored subset */
579       ierr = ISSum(sub_schurs->is_I_layer,sub_schurs->is_subs[subidx],&is_local_numbering);CHKERRQ(ierr);
580       ierr = ISGetLocalSize(is_local_numbering,&n_local_dofs);CHKERRQ(ierr);
581       ierr = ISGetIndices(is_local_numbering,(const PetscInt **)&local_numbering);CHKERRQ(ierr);
582       ierr = PCBDDCSubsetNumbering(PetscSubcommChild(par_subcomm),pcbddc->mat_graph->l2gmap,n_local_dofs,local_numbering,PETSC_NULL,&n_global_dofs,&global_numbering);CHKERRQ(ierr);
583       ierr = ISRestoreIndices(is_local_numbering,(const PetscInt **)&local_numbering);CHKERRQ(ierr);
584 
585       /* L2Gmap from relevant dofs to local dofs */
586       ierr = ISLocalToGlobalMappingCreateIS(is_local_numbering,&WtoNmap);CHKERRQ(ierr);
587 
588       /* L2Gmap from local to global dofs */
589       ierr = ISLocalToGlobalMappingCreate(PetscSubcommChild(par_subcomm),1,n_local_dofs,global_numbering,PETSC_COPY_VALUES,&l2gmap_subset);CHKERRQ(ierr);
590 
591       /* compute parallel matrix (extended dirichlet problem on subset) */
592       ierr = MatCreateIS(PetscSubcommChild(par_subcomm),1,PETSC_DECIDE,PETSC_DECIDE,n_global_dofs,n_global_dofs,l2gmap_subset,&color_mat_is);CHKERRQ(ierr);
593       ierr = MatGetSubMatrix(pcbddc->local_mat,is_local_numbering,is_local_numbering,MAT_INITIAL_MATRIX,&temp_mat);CHKERRQ(ierr);
594       ierr = MatISSetLocalMat(color_mat_is,temp_mat);CHKERRQ(ierr);
595       ierr = MatDestroy(&temp_mat);CHKERRQ(ierr);
596       ierr = MatISGetMPIXAIJ(color_mat_is,MAT_INITIAL_MATRIX,&color_mat);CHKERRQ(ierr);
597       ierr = MatDestroy(&color_mat_is);CHKERRQ(ierr);
598 
599       /* work vector for (parallel) extended dirichlet problem */
600       ierr = MatCreateVecs(color_mat,&deluxe_ctx->par_vec[i],NULL);CHKERRQ(ierr);
601 
602       /* compute scatters */
603       /* deluxe_ctx->par_scctx_p[i] extension from local subset to extended dirichlet problem
604          deluxe_ctx->par_scctx_s[i] restriction from local boundary to subset -> simple copy of selected values */
605       ierr = ISGlobalToLocalMappingApplyIS(pcis->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[subidx],&isB_local);CHKERRQ(ierr);
606       ierr = MatCreateVecs(sub_schurs->S_Ej[subidx],&deluxe_ctx->par_work1[i],&deluxe_ctx->par_work2[i]);CHKERRQ(ierr);
607       ierr = VecScatterCreate(pcbddc->work_scaling,isB_local,deluxe_ctx->par_work1[i],NULL,&deluxe_ctx->par_scctx_s[i]);CHKERRQ(ierr);
608       ierr = ISGlobalToLocalMappingApplyIS(WtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[subidx],&isW_local);CHKERRQ(ierr);
609       ierr = ISLocalToGlobalMappingApplyIS(l2gmap_subset,isW_local,&isW);CHKERRQ(ierr);
610       ierr = VecScatterCreate(deluxe_ctx->par_work1[i],NULL,deluxe_ctx->par_vec[i],isW,&deluxe_ctx->par_scctx_p[i]);CHKERRQ(ierr);
611 
612       /* free objects no longer neeeded */
613       ierr = ISDestroy(&isW);CHKERRQ(ierr);
614       ierr = ISDestroy(&isW_local);CHKERRQ(ierr);
615       ierr = ISDestroy(&isB_local);CHKERRQ(ierr);
616       ierr = ISLocalToGlobalMappingDestroy(&WtoNmap);CHKERRQ(ierr);
617       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subset);CHKERRQ(ierr);
618       ierr = ISDestroy(&is_local_numbering);CHKERRQ(ierr);
619       ierr = PetscFree(global_numbering);CHKERRQ(ierr);
620 
621       /* KSP for extended dirichlet problem */
622       ierr = KSPCreate(PetscSubcommChild(par_subcomm),&deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
623       ierr = KSPSetOperators(deluxe_ctx->par_ksp[i],color_mat,color_mat);CHKERRQ(ierr);
624       ierr = KSPSetTolerances(deluxe_ctx->par_ksp[i],1.e-12,1.e-12,1.e10,10000);CHKERRQ(ierr);
625       ierr = KSPSetType(deluxe_ctx->par_ksp[i],KSPPREONLY);CHKERRQ(ierr);
626       ierr = KSPGetPC(deluxe_ctx->par_ksp[i],&pctemp);CHKERRQ(ierr);
627       ierr = PCSetType(pctemp,PCREDUNDANT);CHKERRQ(ierr);
628       ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr);
629       len -= 10; /* remove "dirichlet_" */
630       ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len+1);CHKERRQ(ierr); /* PetscStrncpy puts a terminating char at the end */
631       ierr = PetscStrcat(ksp_prefix,"deluxe_par_");CHKERRQ(ierr);
632       ierr = KSPSetOptionsPrefix(deluxe_ctx->par_ksp[i],ksp_prefix);CHKERRQ(ierr);
633       ierr = KSPSetFromOptions(deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
634       ierr = KSPSetUp(deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
635       ierr = MatDestroy(&color_mat);CHKERRQ(ierr);
636     }
637     ierr = PetscSubcommDestroy(&par_subcomm);CHKERRQ(ierr);
638   }
639   for (i=0;i<deluxe_ctx->par_colors;i++) {
640     ierr = ISDestroy(&is_colors[i]);CHKERRQ(ierr);
641   }
642   ierr = PetscFree(is_colors);CHKERRQ(ierr);
643 
644   if (pcbddc->dbg_flag) {
645     Vec test_vec;
646     PetscReal error;
647     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
648     /* test partition of unity of coloured schur complements  */
649     for (i=0;i<deluxe_ctx->par_colors;i++) {
650       PetscInt  subidx = deluxe_ctx->par_col2sub[i];
651       PetscBool error_found = PETSC_FALSE;
652       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
653 
654       if (deluxe_ctx->par_ksp[i]) {
655         /* create random test vec being zero on internal nodes of the extende dirichlet problem */
656         ierr = VecDuplicate(deluxe_ctx->par_vec[i],&test_vec);CHKERRQ(ierr);
657         ierr = VecSetRandom(deluxe_ctx->par_work1[i],PETSC_NULL);CHKERRQ(ierr);
658         ierr = VecSet(test_vec,0.0);CHKERRQ(ierr);
659         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work1[i],test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
660         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work1[i],test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
661         /* w_j */
662         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],test_vec,deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
663         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],test_vec,deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
664         /* S_j*w_j */
665         ierr = MatMult(sub_schurs->S_Ej[subidx],deluxe_ctx->par_work1[i],deluxe_ctx->par_work2[i]);CHKERRQ(ierr);
666         /* \sum_j S_j*w_j */
667         ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
668         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work2[i],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
669         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work2[i],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
670         /* (\sum_j S_j)^(-1)(\sum_j S_j*w_j) */
671         ierr = KSPSolve(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
672         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
673         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_work1[i],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
674         ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
675         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work1[i],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
676         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_work1[i],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
677         /* test partition of unity */
678         ierr = VecAXPY(test_vec,-1.0,deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
679         ierr = VecNorm(test_vec,NORM_INFINITY,&error);CHKERRQ(ierr);
680         if (PetscAbsReal(error) > 1.e-2) {
681           /* ierr = VecView(test_vec,0);CHKERRQ(ierr); */
682           error_found = PETSC_TRUE;
683         }
684         ierr = VecDestroy(&test_vec);CHKERRQ(ierr);
685       }
686       if (error_found) {
687         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Error testing local schur for color %d and subdomain %d\n",i,PetscGlobalRank);CHKERRQ(ierr);
688       }
689       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
690     }
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Seq"
697 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq(PC pc)
698 {
699   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
700   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
701   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
702   PC                     pc_temp;
703   MatSolverPackage       solver=NULL;
704   char                   ksp_prefix[256];
705   size_t                 len;
706   PetscInt               local_size;
707   PetscErrorCode         ierr;
708 
709   PetscFunctionBegin;
710   if (!sub_schurs->n_subs_seq_g) {
711     PetscFunctionReturn(0);
712   }
713 
714   /* Create work vectors for sequential part of deluxe */
715   ierr = MatCreateVecs(sub_schurs->sum_S_Ej_all,&deluxe_ctx->seq_work1,&deluxe_ctx->seq_work2);CHKERRQ(ierr);
716 
717   /* Compute deluxe sequential scatter */
718   ierr = VecScatterCreate(pcbddc->work_scaling,sub_schurs->is_Ej_all,deluxe_ctx->seq_work1,NULL,&deluxe_ctx->seq_scctx);CHKERRQ(ierr);
719 
720   /* Create KSP object for sequential part of deluxe scaling */
721   ierr = KSPCreate(PETSC_COMM_SELF,&deluxe_ctx->seq_ksp);CHKERRQ(ierr);
722   ierr = KSPSetOperators(deluxe_ctx->seq_ksp,sub_schurs->sum_S_Ej_all,sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
723   ierr = KSPSetType(deluxe_ctx->seq_ksp,KSPPREONLY);CHKERRQ(ierr);
724   ierr = KSPGetPC(deluxe_ctx->seq_ksp,&pc_temp);CHKERRQ(ierr);
725   ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
726   ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
727   ierr = PCFactorGetMatSolverPackage(pc_temp,(const MatSolverPackage*)&solver);CHKERRQ(ierr);
728   ierr = MatGetSize(sub_schurs->sum_S_Ej_all,&local_size,NULL);CHKERRQ(ierr);
729   if (solver && local_size) { /* if local_size is null, some external packages will report errors */
730     PC     new_pc;
731     PCType type;
732     ierr = PCGetType(pc_temp,&type);CHKERRQ(ierr);
733     ierr = KSPGetPC(deluxe_ctx->seq_ksp,&new_pc);CHKERRQ(ierr);
734     ierr = PCSetType(new_pc,type);CHKERRQ(ierr);
735     ierr = PCFactorSetMatSolverPackage(new_pc,solver);CHKERRQ(ierr);
736   }
737   ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr);
738   len -= 10; /* remove "dirichlet_" */
739   ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len+1);CHKERRQ(ierr);
740   ierr = PetscStrcat(ksp_prefix,"deluxe_seq_");CHKERRQ(ierr);
741   ierr = KSPSetOptionsPrefix(deluxe_ctx->seq_ksp,ksp_prefix);CHKERRQ(ierr);
742   if (local_size) {
743     ierr = KSPSetFromOptions(deluxe_ctx->seq_ksp);CHKERRQ(ierr);
744   }
745   ierr = KSPSetUp(deluxe_ctx->seq_ksp);CHKERRQ(ierr);
746   PetscFunctionReturn(0);
747 }
748