xref: /petsc/src/ksp/pc/impls/bddc/bddcscalingbasic.c (revision 5db18549ef4e876dcd315358182a0a733898e86f)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 
4 /* prototypes for deluxe functions */
5 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
6 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
7 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
8 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Par(PC,PetscInt,PetscInt,PetscInt[],PetscInt[]);
9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq(PC,PetscInt,PetscInt,PetscInt[],PetscInt[]);
10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq_New(PC);
11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "PCBDDCScalingExtension_Basic"
15 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
16 {
17   PC_IS* pcis = (PC_IS*)pc->data;
18   PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22   /* Apply partition of unity */
23   ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr);
24   ierr = VecSet(global_vector,0.0);CHKERRQ(ierr);
25   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
26   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
27   PetscFunctionReturn(0);
28 }
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "PCBDDCScalingExtension_Deluxe"
32 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
33 {
34   PC_IS*              pcis=(PC_IS*)pc->data;
35   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
36   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
37   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs[1];
38   PetscInt            i;
39   PetscErrorCode      ierr;
40 
41   /* TODO CHECK STUFF RELATED WITH FAKE WORK */
42   PetscFunctionBegin;
43   ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr); /* needed by the fake work below */
44   if (deluxe_ctx->n_simple) {
45     /* scale deluxe vertices using diagonal scaling */
46     PetscScalar *array_x,*array_D,*array;
47     ierr = VecGetArray(x,&array_x);CHKERRQ(ierr);
48     ierr = VecGetArray(pcis->D,&array_D);CHKERRQ(ierr);
49     ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
50     for (i=0;i<deluxe_ctx->n_simple;i++) {
51       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
52     }
53     ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
54     ierr = VecRestoreArray(pcis->D,&array_D);CHKERRQ(ierr);
55     ierr = VecRestoreArray(x,&array_x);CHKERRQ(ierr);
56   }
57   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication and ksp solution */
58   if (deluxe_ctx->seq_ksp) {
59     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,x,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
60     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,x,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
61     ierr = MatMult(sub_schurs->S_Ej_all,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2);CHKERRQ(ierr);
62     ierr = KSPSolve(deluxe_ctx->seq_ksp,deluxe_ctx->seq_work2,deluxe_ctx->seq_work1);CHKERRQ(ierr);
63     /* fake work due to final ADD VALUES and vertices scaling needed? TODO: check it */
64     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
65     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
66   }
67   /* parallel part */
68   for (i=0;i<deluxe_ctx->par_colors;i++) {
69     if (deluxe_ctx->par_ksp[i]) {
70       PetscMPIInt color_rank;
71       PetscInt    subidx = deluxe_ctx->par_col2sub[i];
72       /* restrict on subset */
73       ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],x,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
74       ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],x,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
75       /* S_Ej */
76       ierr = MatMult(sub_schurs->S_Ej[subidx],sub_schurs->work1[subidx],sub_schurs->work2[subidx]);CHKERRQ(ierr);
77       /* (\sum_j S_Ej)^-1 */
78       ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
79       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
80       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81       ierr = KSPSolve(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
82       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)deluxe_ctx->par_ksp[i]),&color_rank);CHKERRQ(ierr);
83       /* get back solution on subset */
84       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
85       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
86       if (!color_rank) { /* only the master process in coloured comm copies the computed values */
87         ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],sub_schurs->work1[subidx],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
88         ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],sub_schurs->work1[subidx],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
89       }
90     }
91   }
92   /* put local boundary part in global vector */
93   ierr = VecSet(y,0.0);CHKERRQ(ierr);
94   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
95   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "PCBDDCScalingExtension"
101 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
102 {
103   PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
104   PetscErrorCode ierr;
105 
106   PetscFunctionBegin;
107   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
108   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2);
109   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3);
110   if (local_interface_vector == pcbddc->work_scaling) {
111     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
112   }
113   ierr = PetscTryMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr);
114   PetscFunctionReturn(0);
115 }
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "PCBDDCScalingRestriction_Basic"
119 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
120 {
121   PetscErrorCode ierr;
122   PC_IS* pcis = (PC_IS*)pc->data;
123 
124   PetscFunctionBegin;
125   ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
126   ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
127   /* Apply partition of unity */
128   ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "PCBDDCScalingRestriction_Deluxe"
134 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
135 {
136   PC_IS*              pcis=(PC_IS*)pc->data;
137   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
138   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
139   PCBDDCSubSchurs     sub_schurs = pcbddc->sub_schurs[1];
140   PetscInt            i;
141   PetscErrorCode      ierr;
142 
143   PetscFunctionBegin;
144   /* get local boundary part of global vector */
145   ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
146   ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
147   if (deluxe_ctx->n_simple) {
148     /* scale deluxe vertices using diagonal scaling */
149     PetscScalar *array_y,*array_D;
150     ierr = VecGetArray(y,&array_y);CHKERRQ(ierr);
151     ierr = VecGetArray(pcis->D,&array_D);CHKERRQ(ierr);
152     for (i=0;i<deluxe_ctx->n_simple;i++) {
153       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
154     }
155     ierr = VecRestoreArray(pcis->D,&array_D);CHKERRQ(ierr);
156     ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr);
157   }
158   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication and ksp solution */
159   if (deluxe_ctx->seq_ksp) {
160     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,y,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
161     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,y,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
162     ierr = KSPSolveTranspose(deluxe_ctx->seq_ksp,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2);CHKERRQ(ierr);
163     ierr = MatMultTranspose(sub_schurs->S_Ej_all,deluxe_ctx->seq_work2,deluxe_ctx->seq_work1);CHKERRQ(ierr);
164     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
165     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
166   }
167   /* parallel part */
168   for (i=0;i<deluxe_ctx->par_colors;i++) {
169     if (deluxe_ctx->par_ksp[i]) {
170       PetscInt subidx = deluxe_ctx->par_col2sub[i];
171       /* restrict on subset */
172       ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],y,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
173       ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],y,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
174       /* (\sum_j S_Ej)^-T */
175       ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
176       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
177       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
178       ierr = KSPSolveTranspose(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
179       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
180       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
181       /* S_Ej^T */
182       ierr = MatMultTranspose(sub_schurs->S_Ej[subidx],sub_schurs->work1[subidx],sub_schurs->work2[subidx]);CHKERRQ(ierr);
183       /* extend to boundary */
184       ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],sub_schurs->work2[subidx],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
185       ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],sub_schurs->work2[subidx],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
186     }
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "PCBDDCScalingRestriction"
193 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
194 {
195   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
196   PetscErrorCode ierr;
197 
198   PetscFunctionBegin;
199   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
200   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2);
201   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3);
202   if (local_interface_vector == pcbddc->work_scaling) {
203     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Local vector should cannot be pcbddc->work_scaling!\n");
204   }
205   ierr = PetscTryMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr);
206   PetscFunctionReturn(0);
207 }
208 
209 #undef __FUNCT__
210 #define __FUNCT__ "PCBDDCScalingSetUp"
211 PetscErrorCode PCBDDCScalingSetUp(PC pc)
212 {
213   PC_IS* pcis=(PC_IS*)pc->data;
214   PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
219   /* create work vector for the operator */
220   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
221   ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr);
222   /* always rebuild pcis->D */
223   if (pcis->use_stiffness_scaling) {
224     ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr);
225     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
226     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
227   }
228   ierr = VecCopy(pcis->D,pcis->vec1_B);CHKERRQ(ierr);
229   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
230   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
231   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
232   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
233   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
234   ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr);
235   /* now setup */
236   if (pcbddc->use_deluxe_scaling) {
237     if (!pcbddc->deluxe_ctx) {
238       ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr);
239     }
240     ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr);
241     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr);
242     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr);
243   } else {
244     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr);
245     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr);
246   }
247 
248   /* test */
249   if (pcbddc->dbg_flag) {
250     Vec         vec2_global;
251     PetscViewer viewer=pcbddc->dbg_viewer;
252     PetscReal   error;
253 
254     /* extension -> from local to parallel */
255     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
256     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
257     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
258     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
259     ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr);
260     ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr);
261 
262     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
263     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
264     ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr);
265     ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr);
266     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
267     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr);
268     if (error>1.e-8 && pcbddc->dbg_flag>1) {
269       ierr = VecView(pcis->vec1_global,viewer);CHKERRQ(ierr);
270     }
271     ierr = VecDestroy(&vec2_global);CHKERRQ(ierr);
272 
273     /* restriction -> from parallel to local */
274     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
275     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
276     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
277     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
278 
279     ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr);
280     ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr);
281     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
282     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
283     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
284     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr);
285     if (error>1.e-8 && pcbddc->dbg_flag>1) {
286       ierr = VecView(pcis->vec1_global,viewer);CHKERRQ(ierr);
287     }
288   }
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "PCBDDCScalingDestroy"
294 PetscErrorCode PCBDDCScalingDestroy(PC pc)
295 {
296   PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
297   PetscErrorCode ierr;
298 
299   PetscFunctionBegin;
300   if (pcbddc->deluxe_ctx) {
301     ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr);
302   }
303   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
304   /* remove functions */
305   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr);
306   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "PCBDDCScalingCreate_Deluxe"
312 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
313 {
314   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
315   PCBDDCDeluxeScaling deluxe_ctx;
316   PetscErrorCode      ierr;
317 
318   PetscFunctionBegin;
319   ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr);
320   pcbddc->deluxe_ctx = deluxe_ctx;
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "PCBDDCScalingDestroy_Deluxe"
326 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
327 {
328   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
329   PetscErrorCode      ierr;
330 
331   PetscFunctionBegin;
332   ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr);
333   ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "PCBDDCScalingReset_Deluxe_Solvers"
339 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
340 {
341   PetscErrorCode      ierr;
342 
343   PetscFunctionBegin;
344   ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
345   deluxe_ctx->n_simple = 0;
346   if (deluxe_ctx->seq_ksp) {
347     ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx);CHKERRQ(ierr);
348     ierr = VecDestroy(&deluxe_ctx->seq_work1);CHKERRQ(ierr);
349     ierr = VecDestroy(&deluxe_ctx->seq_work2);CHKERRQ(ierr);
350     ierr = KSPDestroy(&deluxe_ctx->seq_ksp);CHKERRQ(ierr);
351   }
352   if (deluxe_ctx->par_colors) {
353     PetscInt i;
354     for (i=0;i<deluxe_ctx->par_colors;i++) {
355       ierr = VecScatterDestroy(&deluxe_ctx->par_scctx_s[i]);CHKERRQ(ierr);
356       ierr = VecScatterDestroy(&deluxe_ctx->par_scctx_p[i]);CHKERRQ(ierr);
357       ierr = VecDestroy(&deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
358       ierr = KSPDestroy(&deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
359     }
360     ierr = PetscFree5(deluxe_ctx->par_ksp,
361                       deluxe_ctx->par_scctx_s,
362                       deluxe_ctx->par_scctx_p,
363                       deluxe_ctx->par_vec,
364                       deluxe_ctx->par_col2sub);CHKERRQ(ierr);
365   }
366   deluxe_ctx->par_colors = 0;
367   PetscFunctionReturn(0);
368 }
369 
370 #define OLD_CODE 0
371 #undef __FUNCT__
372 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe"
373 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
374 {
375   PC_IS               *pcis=(PC_IS*)pc->data;
376   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
377   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
378   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs[1];
379   PCBDDCGraph         graph;
380   PetscBT             bitmask;
381 #if OLD_CODE
382   IS                  *faces,*edges,*all_cc;
383   PetscInt            *index_sequential,*index_parallel;
384   PetscInt            *auxlocal_sequential,*auxlocal_parallel;
385   PetscInt            *auxglobal_sequential,*auxglobal_parallel;
386   PetscInt            *auxmapping,*idxs;
387   PetscInt            i,max_subset_size;
388   PetscInt            n_sequential_problems,n_local_sequential_problems,n_parallel_problems,n_local_parallel_problems;
389   PetscInt            n_faces,n_edges,n_all_cc;
390 #else
391   PetscInt i;
392   const PetscInt* idxs;
393   Mat       S_j;
394   PetscBool free_used_adj;
395   PetscInt  *used_xadj,*used_adjncy;
396 #endif
397   PetscErrorCode      ierr;
398 
399   PetscFunctionBegin;
400   /* throw away the solvers */
401   ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr);
402 
403   /* attach interface graph for determining subsets */
404   if (pcbddc->deluxe_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
405     PetscInt *idx_V_N;
406     IS       verticesIS;
407     ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&idx_V_N);CHKERRQ(ierr);
408     ierr = ISCreateGeneral(PETSC_COMM_SELF,i,idx_V_N,PETSC_OWN_POINTER,&verticesIS);CHKERRQ(ierr);
409     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
410     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
411     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
412     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
413     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
414 /*
415     if (pcbddc->dbg_flag) {
416       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
417     }
418 */
419   } else {
420     graph = pcbddc->mat_graph;
421   }
422 
423 #if OLD_CODE
424   /* get index sets for faces and edges */
425   ierr = PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,NULL);CHKERRQ(ierr);
426   n_all_cc = n_faces+n_edges;
427   ierr = PetscMalloc1(n_all_cc,&all_cc);CHKERRQ(ierr);
428   for (i=0;i<n_faces;i++) {
429     all_cc[i] = faces[i];
430   }
431   for (i=0;i<n_edges;i++) {
432     all_cc[n_faces+i] = edges[i];
433   }
434   ierr = PetscFree(faces);CHKERRQ(ierr);
435   ierr = PetscFree(edges);CHKERRQ(ierr);
436 
437   /* map interface's subsets */
438   max_subset_size = 0;
439   for (i=0;i<n_all_cc;i++) {
440     PetscInt subset_size;
441     ierr = ISGetLocalSize(all_cc[i],&subset_size);CHKERRQ(ierr);
442     max_subset_size = PetscMax(max_subset_size,subset_size);
443   }
444   ierr = PetscMalloc5(max_subset_size,&auxmapping,
445                       graph->ncc,&auxlocal_sequential,
446                       graph->ncc,&auxlocal_parallel,
447                       graph->ncc,&index_sequential,
448                       graph->ncc,&index_parallel);CHKERRQ(ierr);
449 
450   /* if threshold is negative, uses all sequential problems */
451   if (pcbddc->deluxe_threshold < 0) pcbddc->deluxe_threshold = max_subset_size;
452 
453   /* workspace */
454   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
455   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs);CHKERRQ(ierr);
456   for (i=0;i<pcis->n-pcis->n_B;i++) {
457     ierr = PetscBTSet(bitmask,idxs[i]);CHKERRQ(ierr);
458   }
459   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs);CHKERRQ(ierr);
460 
461   /* determine which problem has to be solved in parallel or sequentially */
462   n_local_sequential_problems = 0;
463   n_local_parallel_problems = 0;
464   for (i=0;i<n_all_cc;i++) {
465     PetscInt subset_size,j,min_loc = 0;
466 
467     ierr = ISGetLocalSize(all_cc[i],&subset_size);CHKERRQ(ierr);
468     ierr = ISGetIndices(all_cc[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
469     for (j=0;j<subset_size;j++) {
470       ierr = PetscBTSet(bitmask,idxs[j]);CHKERRQ(ierr);
471     }
472     ierr = ISLocalToGlobalMappingApply(graph->l2gmap,subset_size,idxs,auxmapping);CHKERRQ(ierr);
473     for (j=1;j<subset_size;j++) {
474       if (auxmapping[j]<auxmapping[min_loc]) {
475         min_loc = j;
476       }
477     }
478     if (subset_size > pcbddc->deluxe_threshold) {
479       index_parallel[n_local_parallel_problems] = i;
480       auxlocal_parallel[n_local_parallel_problems] = idxs[min_loc];
481       n_local_parallel_problems++;
482     } else {
483       index_sequential[n_local_sequential_problems] = i;
484       auxlocal_sequential[n_local_sequential_problems] = idxs[min_loc];
485       n_local_sequential_problems++;
486     }
487     ierr = ISRestoreIndices(all_cc[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
488   }
489 
490   /* diagonal scaling on interface dofs not contained in cc */
491   deluxe_ctx->n_simple = 0;
492   for (i=0;i<pcis->n;i++) {
493     if (!PetscBTLookup(bitmask,i)) {
494       deluxe_ctx->n_simple++;
495     }
496   }
497   ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
498   deluxe_ctx->n_simple = 0;
499   for (i=0;i<pcis->n;i++) {
500     if (!PetscBTLookup(bitmask,i)) {
501       deluxe_ctx->idx_simple_B[deluxe_ctx->n_simple++] = i;
502     }
503   }
504   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B,&i,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
505   if (i != deluxe_ctx->n_simple) {
506     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simple scaling dofs! %d != %d",i,deluxe_ctx->n_simple);
507   }
508   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
509 
510   /* SetUp local schur complements on subsets TODO better reuse procedure */
511   if (!sub_schurs->n_subs) {
512     Mat       S_j;
513     PetscBool free_used_adj;
514     PetscInt  *used_xadj,*used_adjncy;
515 
516     /* decide the adjacency to be used for determining internal problems for local schur on subsets */
517     free_used_adj = PETSC_FALSE;
518     if (pcbddc->deluxe_layers == -1) {
519       used_xadj = NULL;
520       used_adjncy = NULL;
521     } else {
522       if ((pcbddc->deluxe_use_useradj && pcbddc->mat_graph->xadj) || !pcbddc->deluxe_compute_rowadj) {
523         used_xadj = pcbddc->mat_graph->xadj;
524         used_adjncy = pcbddc->mat_graph->adjncy;
525       } else {
526         Mat            mat_adj;
527         PetscBool      flg_row=PETSC_TRUE;
528         const PetscInt *xadj,*adjncy;
529         PetscInt       nvtxs;
530 
531         ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
532         ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
533         if (!flg_row) {
534           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
535         }
536         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
537         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
538         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
539         ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
540         if (!flg_row) {
541           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
542         }
543         ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
544         free_used_adj = PETSC_TRUE;
545       }
546     }
547 
548     /* Create Schur complement matrix */
549     ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
550     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
551 
552     /* setup Schur complements on subsets */
553     ierr = PCBDDCSubSchursSetUp(sub_schurs,S_j,pcis->is_I_local,pcis->is_B_local,n_all_cc,all_cc,used_xadj,used_adjncy,pcbddc->deluxe_layers);CHKERRQ(ierr);
554     ierr = MatDestroy(&S_j);CHKERRQ(ierr);
555     /* free adjacency */
556     if (free_used_adj) {
557       ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
558     }
559   }
560   for (i=0;i<n_all_cc;i++) {
561     ierr = ISDestroy(&all_cc[i]);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(all_cc);CHKERRQ(ierr);
564 
565   /* Number parallel problems */
566   auxglobal_parallel = 0;
567   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)pc),graph->l2gmap,n_local_parallel_problems,auxlocal_parallel,PETSC_NULL,&n_parallel_problems,&auxglobal_parallel);CHKERRQ(ierr);
568   if (pcbddc->dbg_flag) {
569     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Deluxe global number of parallel subproblems: %d\n",n_parallel_problems);
570   }
571 
572   /* Compute data structures to solve parallel problems */
573   ierr = PCBDDCScalingSetUp_Deluxe_Par(pc,n_local_parallel_problems,n_parallel_problems,auxglobal_parallel,index_parallel);CHKERRQ(ierr);
574   ierr = PetscFree(auxglobal_parallel);CHKERRQ(ierr);
575 
576 
577   /* Number sequential problems */
578   auxglobal_sequential = 0;
579   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)pc),graph->l2gmap,n_local_sequential_problems,auxlocal_sequential,PETSC_NULL,&n_sequential_problems,&auxglobal_sequential);CHKERRQ(ierr);
580   if (pcbddc->dbg_flag) {
581     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Deluxe global number of sequential subproblems: %d\n",n_sequential_problems);
582   }
583 
584   /* Compute data structures to solve sequential problems */
585   ierr = PCBDDCScalingSetUp_Deluxe_Seq(pc,n_local_sequential_problems,n_sequential_problems,auxglobal_sequential,index_sequential);CHKERRQ(ierr);
586   ierr = PetscFree(auxglobal_sequential);CHKERRQ(ierr);
587 
588   /* free workspace */
589   ierr = PetscFree5(auxmapping,auxlocal_sequential,auxlocal_parallel,index_sequential,index_parallel);CHKERRQ(ierr);
590 #else
591 
592   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
593   free_used_adj = PETSC_FALSE;
594   if (pcbddc->deluxe_layers == -1) {
595     used_xadj = NULL;
596     used_adjncy = NULL;
597   } else {
598     if ((pcbddc->deluxe_use_useradj && pcbddc->mat_graph->xadj) || !pcbddc->deluxe_compute_rowadj) {
599       used_xadj = pcbddc->mat_graph->xadj;
600       used_adjncy = pcbddc->mat_graph->adjncy;
601     } else {
602       Mat            mat_adj;
603       PetscBool      flg_row=PETSC_TRUE;
604       const PetscInt *xadj,*adjncy;
605       PetscInt       nvtxs;
606 
607       ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
608       ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
609       if (!flg_row) {
610         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
611       }
612       ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
613       ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
614       ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
615       ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
616       if (!flg_row) {
617         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
618       }
619       ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
620       free_used_adj = PETSC_TRUE;
621     }
622   }
623 
624 
625   /* Create Schur complement matrix */
626   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
627   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
628 
629   /* sub_schurs init */ /* TODO reuse adaptive one if valid (i.e. pcbddc->local_mat == matis->A and same graph info (HOW?) ) */
630   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->deluxe_threshold);CHKERRQ(ierr);
631   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
632   ierr = PCBDDCSubSchursSetUpNew(sub_schurs,used_xadj,used_adjncy,pcbddc->deluxe_layers);CHKERRQ(ierr);
633 
634   /* Compute data structures to solve parallel problems */
635   ierr = PCBDDCScalingSetUp_Deluxe_Par(pc,sub_schurs->n_subs_par,sub_schurs->n_subs_par_g,
636                                        sub_schurs->auxglobal_parallel,
637                                        sub_schurs->index_parallel);CHKERRQ(ierr);
638   /* Compute data structures to solve sequential problems */
639   ierr = PCBDDCScalingSetUp_Deluxe_Seq_New(pc);CHKERRQ(ierr);
640 
641   /* free adjacency */
642   if (free_used_adj) {
643     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
644   }
645 
646   /* diagonal scaling on interface dofs not contained in cc */
647   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
648   ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
649   for (i=0;i<pcis->n-pcis->n_B;i++) {
650     ierr = PetscBTSet(bitmask,idxs[i]);CHKERRQ(ierr);
651   }
652   ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
653 
654   for (i=0;i<sub_schurs->n_subs;i++) {
655     PetscInt subset_size,j;
656 
657     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
658     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
659     for (j=0;j<subset_size;j++) {
660       ierr = PetscBTSet(bitmask,idxs[j]);CHKERRQ(ierr);
661     }
662     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
663   }
664 
665   deluxe_ctx->n_simple = 0;
666   for (i=0;i<pcis->n;i++) {
667     if (!PetscBTLookup(bitmask,i)) {
668       deluxe_ctx->n_simple++;
669     }
670   }
671   ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
672   deluxe_ctx->n_simple = 0;
673   for (i=0;i<pcis->n;i++) {
674     if (!PetscBTLookup(bitmask,i)) {
675       deluxe_ctx->idx_simple_B[deluxe_ctx->n_simple++] = i;
676     }
677   }
678   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B,&i,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
679   if (i != deluxe_ctx->n_simple) {
680     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simple scaling dofs! %d != %d",i,deluxe_ctx->n_simple);
681   }
682   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
683 
684 #endif
685   /* free graph struct */
686   if (pcbddc->deluxe_rebuild) {
687     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
688   }
689   PetscFunctionReturn(0);
690 }
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Par"
694 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Par(PC pc, PetscInt n_local_parallel_problems,PetscInt n_parallel_problems,PetscInt global_parallel[],PetscInt index_parallel[])
695 {
696   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
697   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
698   /* coloring */
699   Mat                    parallel_problems;
700   MatColoring            coloring_obj;
701   ISColoring             coloring_parallel_problems;
702   IS                     *par_is_colors,*is_colors;
703   /* working stuff */
704   PetscInt               i,j;
705   PetscErrorCode         ierr;
706 
707   PetscFunctionBegin;
708   if (!n_parallel_problems) {
709     PetscFunctionReturn(0);
710   }
711   /* Color parallel subproblems */
712   ierr = MatCreate(PetscObjectComm((PetscObject)pc),&parallel_problems);CHKERRQ(ierr);
713   ierr = MatSetSizes(parallel_problems,PETSC_DECIDE,PETSC_DECIDE,n_parallel_problems,n_parallel_problems);CHKERRQ(ierr);
714   ierr = MatSetType(parallel_problems,MATAIJ);CHKERRQ(ierr);
715   ierr = MatSetUp(parallel_problems);CHKERRQ(ierr);
716   ierr = MatSetOption(parallel_problems,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
717   ierr = MatSetOption(parallel_problems,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
718   for (i=0;i<n_local_parallel_problems;i++) {
719     PetscInt row = global_parallel[i];
720     for (j=0;j<n_local_parallel_problems;j++) {
721       PetscInt col = global_parallel[j];
722       if (row != col) {
723         ierr = MatSetValue(parallel_problems,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
724       }
725     }
726   }
727   ierr = MatAssemblyBegin(parallel_problems,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
728   ierr = MatAssemblyEnd(parallel_problems,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
729   if (pcbddc->dbg_flag > 1) {
730     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
731     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Adj matrix for deluxe parallel problems\n");CHKERRQ(ierr);
732     ierr = MatView(parallel_problems,pcbddc->dbg_viewer);CHKERRQ(ierr);
733   }
734   ierr = MatColoringCreate(parallel_problems,&coloring_obj);CHKERRQ(ierr);
735   ierr = MatColoringSetDistance(coloring_obj,1);CHKERRQ(ierr);
736   ierr = MatColoringSetType(coloring_obj,MATCOLORINGJP);CHKERRQ(ierr);
737   ierr = MatColoringApply(coloring_obj,&coloring_parallel_problems);CHKERRQ(ierr);
738   ierr = ISColoringGetIS(coloring_parallel_problems,&deluxe_ctx->par_colors,&par_is_colors);CHKERRQ(ierr);
739   if (pcbddc->dbg_flag) {
740     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
741     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Number of colors %d for parallel part of deluxe\n",deluxe_ctx->par_colors);CHKERRQ(ierr);
742   }
743 
744   /* all procs should know the color distribution */
745   ierr = PetscMalloc1(deluxe_ctx->par_colors,&is_colors);CHKERRQ(ierr);
746   for (i=0;i<deluxe_ctx->par_colors;i++) {
747     if (pcbddc->dbg_flag) {
748       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Global problem indexes for color %d\n",i);CHKERRQ(ierr);
749       ierr = ISView(par_is_colors[i],pcbddc->dbg_viewer);CHKERRQ(ierr);
750       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
751     }
752     ierr = ISAllGather(par_is_colors[i],&is_colors[i]);CHKERRQ(ierr);
753   }
754 
755   /* free unneeded objects */
756   ierr = ISColoringRestoreIS(coloring_parallel_problems,&par_is_colors);CHKERRQ(ierr);
757   ierr = ISColoringDestroy(&coloring_parallel_problems);CHKERRQ(ierr);
758   ierr = MatColoringDestroy(&coloring_obj);CHKERRQ(ierr);
759   ierr = MatDestroy(&parallel_problems);CHKERRQ(ierr);
760 
761   /* allocate deluxe arrays for parallel problems */
762   ierr = PetscMalloc5(deluxe_ctx->par_colors,&deluxe_ctx->par_ksp,
763                       deluxe_ctx->par_colors,&deluxe_ctx->par_scctx_s,
764                       deluxe_ctx->par_colors,&deluxe_ctx->par_scctx_p,
765                       deluxe_ctx->par_colors,&deluxe_ctx->par_vec,
766                       deluxe_ctx->par_colors,&deluxe_ctx->par_col2sub);CHKERRQ(ierr);
767 
768   /* cycle on colors */
769   for (i=0;i<deluxe_ctx->par_colors;i++) {
770     PetscSubcomm    par_subcomm;
771     const PetscInt* idxs_subproblems;
772     PetscInt        color_size;
773     PetscMPIInt     rank,active_color;
774 
775     /* get local index of i-th parallel colored problem */
776     ierr = ISGetLocalSize(is_colors[i],&color_size);CHKERRQ(ierr);
777     ierr = ISGetIndices(is_colors[i],&idxs_subproblems);CHKERRQ(ierr);
778     /* split comm for computing parallel problems for this color */
779     /* Processes not partecipating at this stage will have color = color_size */
780     /* because PetscCommDuplicate does not handle MPI_COMM_NULL */
781     active_color = color_size;
782     deluxe_ctx->par_col2sub[i] = -1;
783     for (j=0;j<n_local_parallel_problems;j++) {
784       PetscInt local_idx;
785       ierr = PetscFindInt(global_parallel[j],color_size,idxs_subproblems,&local_idx);CHKERRQ(ierr);
786       if (local_idx > -1) {
787         ierr = PetscMPIIntCast(local_idx,&active_color);CHKERRQ(ierr);
788         deluxe_ctx->par_col2sub[i] = index_parallel[j];
789         break;
790       }
791     }
792     ierr = ISRestoreIndices(is_colors[i],&idxs_subproblems);CHKERRQ(ierr);
793     ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)pc),&par_subcomm);CHKERRQ(ierr);
794     ierr = PetscSubcommSetNumber(par_subcomm,color_size+1);CHKERRQ(ierr);
795     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
796     ierr = PetscSubcommSetTypeGeneral(par_subcomm,active_color,rank);CHKERRQ(ierr);
797     /* print debug info */
798     if (pcbddc->dbg_flag) {
799       PetscMPIInt crank,csize;
800       ierr = MPI_Comm_rank(par_subcomm->comm,&crank);CHKERRQ(ierr);
801       ierr = MPI_Comm_size(par_subcomm->comm,&csize);CHKERRQ(ierr);
802       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Color %d: size %d, details follows.\n",i,color_size);CHKERRQ(ierr);
803       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
804       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
805       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"  Subdomain %d: color in subcomm %d (rank %d out of %d) (lidx %d)\n",PetscGlobalRank,par_subcomm->color,crank,csize,deluxe_ctx->par_col2sub[i]);CHKERRQ(ierr);
806       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
807     }
808 
809     if (deluxe_ctx->par_col2sub[i] >= 0) {
810       PC                     pctemp;
811       PC_IS                  *pcis=(PC_IS*)pc->data;
812       Mat                    color_mat,color_mat_is,temp_mat;
813       ISLocalToGlobalMapping WtoNmap,l2gmap_subset;
814       IS                     is_local_numbering,isB_local,isW_local,isW;
815       PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs[1];
816       PetscInt               subidx,n_local_dofs,n_global_dofs;
817       PetscInt               *global_numbering,*local_numbering;
818       char                   ksp_prefix[256];
819       size_t                 len;
820 
821       /* Local index for schur complement on subset */
822       subidx = deluxe_ctx->par_col2sub[i];
823 
824       /* Parallel numbering for dofs in colored subset */
825       ierr = ISSum(sub_schurs->is_AEj_I[subidx],sub_schurs->is_AEj_B[subidx],&is_local_numbering);CHKERRQ(ierr);
826       ierr = ISGetLocalSize(is_local_numbering,&n_local_dofs);CHKERRQ(ierr);
827       ierr = ISGetIndices(is_local_numbering,(const PetscInt **)&local_numbering);CHKERRQ(ierr);
828       ierr = PCBDDCSubsetNumbering(par_subcomm->comm,pcbddc->mat_graph->l2gmap,n_local_dofs,local_numbering,PETSC_NULL,&n_global_dofs,&global_numbering);CHKERRQ(ierr);
829       ierr = ISRestoreIndices(is_local_numbering,(const PetscInt **)&local_numbering);CHKERRQ(ierr);
830 
831       /* L2Gmap from relevant dofs to local dofs */
832       ierr = ISLocalToGlobalMappingCreateIS(is_local_numbering,&WtoNmap);CHKERRQ(ierr);
833 
834       /* L2Gmap from local to global dofs */
835       ierr = ISLocalToGlobalMappingCreate(par_subcomm->comm,1,n_local_dofs,global_numbering,PETSC_COPY_VALUES,&l2gmap_subset);CHKERRQ(ierr);
836 
837       /* compute parallel matrix (extended dirichlet problem on subset) */
838       ierr = MatCreateIS(par_subcomm->comm,1,PETSC_DECIDE,PETSC_DECIDE,n_global_dofs,n_global_dofs,l2gmap_subset,&color_mat_is);CHKERRQ(ierr);
839       ierr = MatGetSubMatrix(pcbddc->local_mat,is_local_numbering,is_local_numbering,MAT_INITIAL_MATRIX,&temp_mat);CHKERRQ(ierr);
840       ierr = MatISSetLocalMat(color_mat_is,temp_mat);CHKERRQ(ierr);
841       ierr = MatDestroy(&temp_mat);CHKERRQ(ierr);
842       ierr = MatISGetMPIXAIJ(color_mat_is,MAT_INITIAL_MATRIX,&color_mat);CHKERRQ(ierr);
843       ierr = MatDestroy(&color_mat_is);CHKERRQ(ierr);
844 
845       /* work vector for (parallel) extended dirichlet problem */
846       ierr = MatCreateVecs(color_mat,&deluxe_ctx->par_vec[i],NULL);CHKERRQ(ierr);
847 
848       /* compute scatters */
849       /* deluxe_ctx->par_scctx_p[i] extension from local subset to extended dirichlet problem
850          deluxe_ctx->par_scctx_s[i] restriction from local boundary to subset -> simple copy of selected values */
851       ierr = ISGlobalToLocalMappingApplyIS(pcis->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[subidx],&isB_local);CHKERRQ(ierr);
852       ierr = VecScatterCreate(pcbddc->work_scaling,isB_local,sub_schurs->work1[subidx],NULL,&deluxe_ctx->par_scctx_s[i]);CHKERRQ(ierr);
853       ierr = ISGlobalToLocalMappingApplyIS(WtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[subidx],&isW_local);CHKERRQ(ierr);
854       ierr = ISLocalToGlobalMappingApplyIS(l2gmap_subset,isW_local,&isW);CHKERRQ(ierr);
855       ierr = VecScatterCreate(sub_schurs->work1[subidx],NULL,deluxe_ctx->par_vec[i],isW,&deluxe_ctx->par_scctx_p[i]);CHKERRQ(ierr);
856 
857       /* free objects no longer neeeded */
858       ierr = ISDestroy(&isW);CHKERRQ(ierr);
859       ierr = ISDestroy(&isW_local);CHKERRQ(ierr);
860       ierr = ISDestroy(&isB_local);CHKERRQ(ierr);
861       ierr = ISLocalToGlobalMappingDestroy(&WtoNmap);CHKERRQ(ierr);
862       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subset);CHKERRQ(ierr);
863       ierr = ISDestroy(&is_local_numbering);CHKERRQ(ierr);
864       ierr = PetscFree(global_numbering);CHKERRQ(ierr);
865 
866       /* KSP for extended dirichlet problem */
867       ierr = KSPCreate(par_subcomm->comm,&deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
868       ierr = KSPSetOperators(deluxe_ctx->par_ksp[i],color_mat,color_mat);CHKERRQ(ierr);
869       ierr = KSPSetTolerances(deluxe_ctx->par_ksp[i],1.e-12,1.e-12,1.e10,10000);CHKERRQ(ierr);
870       ierr = KSPSetType(deluxe_ctx->par_ksp[i],KSPPREONLY);CHKERRQ(ierr);
871       ierr = KSPGetPC(deluxe_ctx->par_ksp[i],&pctemp);CHKERRQ(ierr);
872       ierr = PCSetType(pctemp,PCREDUNDANT);CHKERRQ(ierr);
873       ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr);
874       len -= 10; /* remove "dirichlet_" */
875       ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len+1);CHKERRQ(ierr); /* PetscStrncpy puts a terminating char at the end */
876       ierr = PetscStrcat(ksp_prefix,"deluxe_par_");CHKERRQ(ierr);
877       ierr = KSPSetOptionsPrefix(deluxe_ctx->par_ksp[i],ksp_prefix);CHKERRQ(ierr);
878       ierr = KSPSetFromOptions(deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
879       ierr = KSPSetUp(deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
880       ierr = MatDestroy(&color_mat);CHKERRQ(ierr);
881     } else { /* not partecipating in color */
882       deluxe_ctx->par_ksp[i] = 0;
883       deluxe_ctx->par_vec[i] = 0;
884       deluxe_ctx->par_scctx_p[i] = 0;
885       deluxe_ctx->par_scctx_s[i] = 0;
886     }
887     ierr = PetscSubcommDestroy(&par_subcomm);CHKERRQ(ierr);
888   }
889   for (i=0;i<deluxe_ctx->par_colors;i++) {
890     ierr = ISDestroy(&is_colors[i]);CHKERRQ(ierr);
891   }
892   ierr = PetscFree(is_colors);CHKERRQ(ierr);
893 
894   if (pcbddc->dbg_flag) {
895     Vec test_vec;
896     PetscReal error;
897     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs[1];
898     /* test partition of unity of coloured schur complements  */
899     for (i=0;i<deluxe_ctx->par_colors;i++) {
900       PetscInt  subidx = deluxe_ctx->par_col2sub[i];
901       PetscBool error_found = PETSC_FALSE;
902       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
903 
904       if (deluxe_ctx->par_ksp[i]) {
905         /* create random test vec being zero on internal nodes of the extende dirichlet problem */
906         ierr = VecDuplicate(deluxe_ctx->par_vec[i],&test_vec);CHKERRQ(ierr);
907         ierr = VecSetRandom(sub_schurs->work1[subidx],PETSC_NULL);CHKERRQ(ierr);
908         ierr = VecSet(test_vec,0.0);CHKERRQ(ierr);
909         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
910         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
911         /* w_j */
912         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],test_vec,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
913         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],test_vec,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
914         /* S_j*w_j */
915         ierr = MatMult(sub_schurs->S_Ej[subidx],sub_schurs->work1[subidx],sub_schurs->work2[subidx]);CHKERRQ(ierr);
916         /* \sum_j S_j*w_j */
917         ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
918         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920         /* (\sum_j S_j)^(-1)(\sum_j S_j*w_j) */
921         ierr = KSPSolve(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
922         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
923         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
924         ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
925         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927         /* test partition of unity */
928         ierr = VecAXPY(test_vec,-1.0,deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
929         ierr = VecNorm(test_vec,NORM_INFINITY,&error);CHKERRQ(ierr);
930         if (PetscAbsReal(error) > 1.e-2) {
931           /* ierr = VecView(test_vec,0);CHKERRQ(ierr); */
932           error_found = PETSC_TRUE;
933         }
934         ierr = VecDestroy(&test_vec);CHKERRQ(ierr);
935       }
936       if (error_found) {
937         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Error testing local schur for color %d and subdomain %d\n",i,PetscGlobalRank);CHKERRQ(ierr);
938       }
939       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
940     }
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Seq_New"
947 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq_New(PC pc)
948 {
949   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
950   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
951   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs[1];
952   PC                     pc_temp;
953   MatSolverPackage       solver=NULL;
954   char                   ksp_prefix[256];
955   size_t                 len;
956   PetscInt               local_size;
957   PetscErrorCode         ierr;
958 
959   PetscFunctionBegin;
960   /* Create work vectors for sequential part of deluxe */
961   ierr = MatCreateVecs(sub_schurs->S_Ej_all,&deluxe_ctx->seq_work1,&deluxe_ctx->seq_work2);CHKERRQ(ierr);
962 
963   /* Compute deluxe sequential scatter */
964   ierr = VecScatterCreate(pcbddc->work_scaling,sub_schurs->is_Ej_all,deluxe_ctx->seq_work1,NULL,&deluxe_ctx->seq_scctx);CHKERRQ(ierr);
965 
966   /* Create KSP object for sequential part of deluxe scaling */
967   ierr = KSPCreate(PETSC_COMM_SELF,&deluxe_ctx->seq_ksp);CHKERRQ(ierr);
968   ierr = KSPSetOperators(deluxe_ctx->seq_ksp,sub_schurs->sum_S_Ej_all,sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
969   ierr = KSPSetType(deluxe_ctx->seq_ksp,KSPPREONLY);CHKERRQ(ierr);
970   ierr = KSPGetPC(deluxe_ctx->seq_ksp,&pc_temp);CHKERRQ(ierr);
971   ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
972   ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
973   ierr = PCFactorGetMatSolverPackage(pc_temp,(const MatSolverPackage*)&solver);CHKERRQ(ierr);
974   ierr = MatGetSize(sub_schurs->sum_S_Ej_all,&local_size,NULL);CHKERRQ(ierr);
975   if (solver && local_size) { /* if local_size is null, some external packages will report errors */
976     PC     new_pc;
977     PCType type;
978     ierr = PCGetType(pc_temp,&type);CHKERRQ(ierr);
979     ierr = KSPGetPC(deluxe_ctx->seq_ksp,&new_pc);CHKERRQ(ierr);
980     ierr = PCSetType(new_pc,type);CHKERRQ(ierr);
981     ierr = PCFactorSetMatSolverPackage(new_pc,solver);CHKERRQ(ierr);
982   }
983   ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr);
984   len -= 10; /* remove "dirichlet_" */
985   ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len+1);CHKERRQ(ierr);
986   ierr = PetscStrcat(ksp_prefix,"deluxe_seq_");CHKERRQ(ierr);
987   ierr = KSPSetOptionsPrefix(deluxe_ctx->seq_ksp,ksp_prefix);CHKERRQ(ierr);
988   if (local_size) {
989     ierr = KSPSetFromOptions(deluxe_ctx->seq_ksp);CHKERRQ(ierr);
990   }
991   ierr = KSPSetUp(deluxe_ctx->seq_ksp);CHKERRQ(ierr);
992   PetscFunctionReturn(0);
993 }
994 
995 #undef __FUNCT__
996 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Seq"
997 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq(PC pc,PetscInt n_local_sequential_problems,PetscInt n_sequential_problems,PetscInt global_sequential[],PetscInt local_sequential[])
998 {
999   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
1000   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
1001   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs[1];
1002   ISLocalToGlobalMapping l2gmap_subsets;
1003   Mat                    global_schur_subsets,*submat_global_schur_subsets,work_mat;
1004   IS                     is_to,is_from;
1005   PetscScalar            *fill_vals;
1006   PetscInt               *nnz,*all_local_idx_G,*all_local_idx_B,*all_local_idx_N,*all_permutation_G,*dummy_idx;
1007   PetscInt               i,j,local_problem_index;
1008   PetscInt               subset_size,max_subset_size;
1009   PetscInt               local_size,global_size;
1010   PC                     pc_temp;
1011   MatSolverPackage       solver=NULL;
1012   char                   ksp_prefix[256];
1013   size_t                 len;
1014   PetscErrorCode         ierr;
1015 
1016   PetscFunctionBegin;
1017   if (!n_sequential_problems) {
1018     PetscFunctionReturn(0);
1019   }
1020 
1021   /* Get info on subset sizes and sum of all subsets sizes */
1022   max_subset_size = 0;
1023   local_size = 0;
1024   for (i=0;i<n_local_sequential_problems;i++) {
1025     local_problem_index = local_sequential[i];
1026     ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr);
1027     max_subset_size = PetscMax(subset_size,max_subset_size);
1028     local_size += subset_size;
1029   }
1030 
1031   /* Work arrays for local indices */
1032   ierr = PetscMalloc1(local_size,&all_local_idx_B);CHKERRQ(ierr);
1033   ierr = PetscMalloc1(local_size,&all_local_idx_N);CHKERRQ(ierr);
1034   ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr);
1035 
1036   /* Get local indices in local whole numbering and local boundary numbering */
1037   local_size = 0;
1038   for (i=0;i<n_local_sequential_problems;i++) {
1039     PC_IS    *pcis=(PC_IS*)pc->data;
1040     PetscInt *idxs;
1041     /* get info on local problem */
1042     local_problem_index = local_sequential[i];
1043     ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr);
1044     ierr = ISGetIndices(sub_schurs->is_AEj_B[local_problem_index],(const PetscInt**)&idxs);CHKERRQ(ierr);
1045     /* subset indices in local numbering */
1046     ierr = PetscMemcpy(all_local_idx_N+local_size,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
1047     /* subset indices in local boundary numbering */
1048     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,subset_size,idxs,&j,&all_local_idx_B[local_size]);CHKERRQ(ierr);
1049     ierr = ISRestoreIndices(sub_schurs->is_AEj_B[local_problem_index],(const PetscInt**)&idxs);CHKERRQ(ierr);
1050     if (j != subset_size) {
1051       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in BDDC deluxe serial %d (BtoNmap)! %d != %d\n",local_problem_index,subset_size,j);
1052     }
1053     for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
1054     local_size += subset_size;
1055   }
1056 
1057   /* Number dofs on all subsets (parallel) and sort numbering */
1058   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)pc),pcbddc->mat_graph->l2gmap,local_size,all_local_idx_N,PETSC_NULL,&global_size,&all_local_idx_G);CHKERRQ(ierr);
1059   ierr = PetscMalloc1(local_size,&all_permutation_G);CHKERRQ(ierr);
1060   for (i=0;i<local_size;i++) {
1061     all_permutation_G[i]=i;
1062   }
1063   ierr = PetscSortIntWithPermutation(local_size,all_local_idx_G,all_permutation_G);CHKERRQ(ierr);
1064 
1065   /* Local matrix of all local Schur on subsets */
1066   ierr = MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);CHKERRQ(ierr);
1067   ierr = MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
1068   ierr = MatSetType(sub_schurs->S_Ej_all,MATAIJ);CHKERRQ(ierr);
1069   ierr = MatSetOption(sub_schurs->S_Ej_all,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1070   ierr = MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);CHKERRQ(ierr);
1071   ierr = PetscFree(nnz);CHKERRQ(ierr);
1072 
1073   /* Work arrays */
1074   ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&fill_vals);CHKERRQ(ierr);
1075 
1076   /* Loop on local problems to compute Schur complements explicitly */
1077   local_size = 0;
1078   for (i=0;i<n_local_sequential_problems;i++) {
1079     /* get info on local problem */
1080     local_problem_index = local_sequential[i];
1081     ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr);
1082     /* local Schur */
1083     for (j=0;j<subset_size;j++) {
1084       ierr = VecSet(sub_schurs->work1[local_problem_index],0.0);CHKERRQ(ierr);
1085       ierr = VecSetValue(sub_schurs->work1[local_problem_index],j,1.0,INSERT_VALUES);CHKERRQ(ierr);
1086       ierr = VecPlaceArray(sub_schurs->work2[local_problem_index],&fill_vals[j*subset_size]);CHKERRQ(ierr);
1087       ierr = MatMult(sub_schurs->S_Ej[local_problem_index],sub_schurs->work1[local_problem_index],sub_schurs->work2[local_problem_index]);CHKERRQ(ierr);
1088       ierr = VecResetArray(sub_schurs->work2[local_problem_index]);CHKERRQ(ierr);
1089     }
1090     for (j=0;j<subset_size;j++) {
1091       dummy_idx[j]=local_size+j;
1092     }
1093     ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,fill_vals,INSERT_VALUES);CHKERRQ(ierr);
1094     local_size += subset_size;
1095   }
1096   ierr = PetscFree2(dummy_idx,fill_vals);CHKERRQ(ierr);
1097   ierr = MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1098   ierr = MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1099 
1100   /* Global matrix of all assembled Schur on subsets */
1101   ierr = ISLocalToGlobalMappingCreate(PetscObjectComm((PetscObject)pc),1,local_size,all_local_idx_G,PETSC_COPY_VALUES,&l2gmap_subsets);CHKERRQ(ierr);
1102   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,&work_mat);CHKERRQ(ierr);
1103   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subsets);CHKERRQ(ierr);
1104   ierr = MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);CHKERRQ(ierr);
1105   ierr = MatISGetMPIXAIJ(work_mat,MAT_INITIAL_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
1106   ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1107   /* Create work vectors for sequential part of deluxe */
1108   ierr = MatCreateVecs(sub_schurs->S_Ej_all,&deluxe_ctx->seq_work1,&deluxe_ctx->seq_work2);CHKERRQ(ierr);
1109 
1110   /* Compute deluxe sequential scatter */
1111   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&is_from);CHKERRQ(ierr);
1112   ierr = VecScatterCreate(pcbddc->work_scaling,is_from,deluxe_ctx->seq_work1,NULL,&deluxe_ctx->seq_scctx);CHKERRQ(ierr);
1113   ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1114 
1115   /* Get local part of (\sum_j S_Ej) */
1116   for (i=0;i<local_size;i++) {
1117     all_local_idx_N[i] = all_local_idx_G[all_permutation_G[i]];
1118   }
1119   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),local_size,all_local_idx_N,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr);
1120   ierr = MatGetSubMatrices(global_schur_subsets,1,&is_to,&is_to,MAT_INITIAL_MATRIX,&submat_global_schur_subsets);CHKERRQ(ierr);
1121   ierr = MatDestroy(&global_schur_subsets);CHKERRQ(ierr);
1122   ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1123   for (i=0;i<local_size;i++) {
1124     all_local_idx_G[all_permutation_G[i]] = i;
1125   }
1126   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_G,PETSC_OWN_POINTER,&is_from);CHKERRQ(ierr);
1127   ierr = ISSetPermutation(is_from);CHKERRQ(ierr);
1128   ierr = MatPermute(submat_global_schur_subsets[0],is_from,is_from,&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
1129   ierr = MatDestroyMatrices(1,&submat_global_schur_subsets);CHKERRQ(ierr);
1130   ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1131   ierr = PetscFree(all_permutation_G);CHKERRQ(ierr);
1132 
1133   /* Create KSP object for sequential part of deluxe scaling */
1134   ierr = KSPCreate(PETSC_COMM_SELF,&deluxe_ctx->seq_ksp);CHKERRQ(ierr);
1135   ierr = KSPSetOperators(deluxe_ctx->seq_ksp,sub_schurs->sum_S_Ej_all,sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
1136   ierr = KSPSetType(deluxe_ctx->seq_ksp,KSPPREONLY);CHKERRQ(ierr);
1137   ierr = KSPGetPC(deluxe_ctx->seq_ksp,&pc_temp);CHKERRQ(ierr);
1138   ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1139   ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1140   ierr = PCFactorGetMatSolverPackage(pc_temp,(const MatSolverPackage*)&solver);CHKERRQ(ierr);
1141   if (solver && local_size) { /* if local_size is null, some external packages will report errors */
1142     PC     new_pc;
1143     PCType type;
1144     ierr = PCGetType(pc_temp,&type);CHKERRQ(ierr);
1145     ierr = KSPGetPC(deluxe_ctx->seq_ksp,&new_pc);CHKERRQ(ierr);
1146     ierr = PCSetType(new_pc,type);CHKERRQ(ierr);
1147     ierr = PCFactorSetMatSolverPackage(new_pc,solver);CHKERRQ(ierr);
1148   }
1149   ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr);
1150   len -= 10; /* remove "dirichlet_" */
1151   ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len+1);CHKERRQ(ierr);
1152   ierr = PetscStrcat(ksp_prefix,"deluxe_seq_");CHKERRQ(ierr);
1153   ierr = KSPSetOptionsPrefix(deluxe_ctx->seq_ksp,ksp_prefix);CHKERRQ(ierr);
1154   if (local_size) {
1155     ierr = KSPSetFromOptions(deluxe_ctx->seq_ksp);CHKERRQ(ierr);
1156   }
1157   ierr = KSPSetUp(deluxe_ctx->seq_ksp);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160