xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision df6f6c19b095f1abc1ac27fcbb9c2092a4a2a65e)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCSetUpSolvers"
10 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PetscScalar    *coarse_submat_vals;
14   PetscErrorCode ierr;
15 
16   PetscFunctionBegin;
17   /* Compute matrix after change of basis and extract local submatrices */
18   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D */
21   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
22   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
23 
24   /* Setup local solvers ksp_D and ksp_R */
25   /* PCBDDCSetUpLocalScatters should be called first! */
26   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
27 
28   /* Change global null space passed in by the user if change of basis has been requested */
29   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
30     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
31   }
32 
33   /*
34      Setup local correction and local part of coarse basis.
35      Gives back the dense local part of the coarse matrix in column major ordering
36   */
37   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* Compute total number of coarse nodes and setup coarse solver */
40   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
41 
42   /* free */
43   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 #undef __FUNCT__
48 #define __FUNCT__ "PCBDDCResetCustomization"
49 PetscErrorCode PCBDDCResetCustomization(PC pc)
50 {
51   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
52   PetscErrorCode ierr;
53 
54   PetscFunctionBegin;
55   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
57   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
59   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
61   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
62   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
63   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
64   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
65   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNCT__
70 #define __FUNCT__ "PCBDDCResetTopography"
71 PetscErrorCode PCBDDCResetTopography(PC pc)
72 {
73   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
78   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
79   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
80   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
81   PetscFunctionReturn(0);
82 }
83 
84 #undef __FUNCT__
85 #define __FUNCT__ "PCBDDCResetSolvers"
86 PetscErrorCode PCBDDCResetSolvers(PC pc)
87 {
88   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
93   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
100   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
101   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
102   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
103   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
104   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
106   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
107   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
108   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
109   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
110   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
111   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
112   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
113   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
114   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
115   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
116   PetscFunctionReturn(0);
117 }
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
122 {
123   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
124   PC_IS          *pcis = (PC_IS*)pc->data;
125   VecType        impVecType;
126   PetscInt       n_constraints,n_R,old_size;
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   if (!pcbddc->ConstraintMatrix) {
131     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
132   }
133   /* get sizes */
134   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
135   n_R = pcis->n-pcbddc->n_actual_vertices;
136   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
137   /* local work vectors (try to avoid unneeded work)*/
138   /* R nodes */
139   old_size = -1;
140   if (pcbddc->vec1_R) {
141     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
142   }
143   if (n_R != old_size) {
144     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
145     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
146     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
147     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
148     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
149     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
150   }
151   /* local primal dofs */
152   old_size = -1;
153   if (pcbddc->vec1_P) {
154     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
155   }
156   if (pcbddc->local_primal_size != old_size) {
157     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
158     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
159     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
160     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
161   }
162   /* local explicit constraints */
163   old_size = -1;
164   if (pcbddc->vec1_C) {
165     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
166   }
167   if (n_constraints && n_constraints != old_size) {
168     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
169     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
170     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
171     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNCT__
177 #define __FUNCT__ "PCBDDCSetUpCorrection"
178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
179 {
180   PetscErrorCode         ierr;
181   /* pointers to pcis and pcbddc */
182   PC_IS*                 pcis = (PC_IS*)pc->data;
183   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
184   /* submatrices of local problem */
185   Mat                    A_RV,A_VR,A_VV;
186   /* working matrices */
187   Mat                    M1,M2,M3,C_CR;
188   /* working vectors */
189   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
190   /* additional working stuff */
191   IS                     is_aux;
192   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
193   const PetscScalar      *array,*row_cmat_values;
194   const PetscInt         *row_cmat_indices,*idx_R_local;
195   PetscInt               *idx_V_B,*auxindices;
196   PetscInt               n_vertices,n_constraints,size_of_constraint;
197   PetscInt               i,j,n_R,n_D,n_B;
198   PetscBool              unsymmetric_check;
199   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
200   MatType                impMatType;
201   /* some shortcuts to scalars */
202   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
203   /* for debugging purposes */
204   PetscReal              *coarsefunctions_errors,*constraints_errors;
205 
206   PetscFunctionBegin;
207   /* get number of vertices (corners plus constraints with change of basis)
208      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
209   n_vertices = pcbddc->n_actual_vertices;
210   n_constraints = pcbddc->local_primal_size-n_vertices;
211   /* Set Non-overlapping dimensions */
212   n_B = pcis->n_B; n_D = pcis->n - n_B;
213   n_R = pcis->n-n_vertices;
214 
215   /* Set types for local objects needed by BDDC precondtioner */
216   impMatType = MATSEQDENSE;
217 
218   /* Allocating some extra storage just to be safe */
219   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
220   for (i=0;i<pcis->n;i++) auxindices[i]=i;
221 
222   /* vertices in boundary numbering */
223   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
224   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
225   if (i != n_vertices) {
226     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
227   }
228 
229   /* Precompute stuffs needed for preprocessing and application of BDDC*/
230   if (n_constraints) {
231     /* see if we can save some allocations */
232     if (pcbddc->local_auxmat2) {
233       PetscInt on_R,on_constraints;
234       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
235       if (on_R != n_R || on_constraints != n_constraints) {
236         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
237         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
238       }
239     }
240     /* work vectors */
241     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
242     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
243     /* auxiliary matrices */
244     if (!pcbddc->local_auxmat2) {
245       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
246       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
247       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
248       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
249     }
250 
251     /* Extract constraints on R nodes: C_{CR}  */
252     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
253     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
254     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
255 
256     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
257     for (i=0;i<n_constraints;i++) {
258       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
259       /* Get row of constraint matrix in R numbering */
260       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
261       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
262       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
263       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
264       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
265       /* Solve for row of constraint matrix in R numbering */
266       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
267       /* Set values in local_auxmat2 */
268       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
269       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
270       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
271     }
272     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
273     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
274     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
275 
276     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
277     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
278     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
279     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
280     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
281     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
282     ierr = MatSetUp(M1);CHKERRQ(ierr);
283     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
284     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
285     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
286     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
287     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
288     ierr = MatDestroy(&M2);CHKERRQ(ierr);
289     ierr = MatDestroy(&M3);CHKERRQ(ierr);
290     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
291     if (!pcbddc->local_auxmat1) {
292       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
293     } else {
294       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
295     }
296   }
297 
298   /* Get submatrices from subdomain matrix */
299   if (n_vertices) {
300     PetscInt ibs,mbs;
301     PetscBool issbaij;
302     Mat newmat;
303 
304     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
305     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
306     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
307     if (ibs != mbs) { /* need to convert to SEQAIJ */
308       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
309       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
310       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
311       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
312       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
313     } else {
314       /* this is safe */
315       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
316       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
317       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
318         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
319         /* which of the two approaches is faster? */
320         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
321         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
322         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
323         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
324         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
325       } else {
326         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
327         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
328       }
329     }
330     ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
331     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
332     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
333   }
334 
335   /* Matrix of coarse basis functions (local) */
336   if (pcbddc->coarse_phi_B) {
337     PetscInt on_B,on_primal;
338     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
339     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
340       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
341       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
342     }
343   }
344   if (pcbddc->coarse_phi_D) {
345     PetscInt on_D,on_primal;
346     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
347     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
348       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
349       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
350     }
351   }
352   if (!pcbddc->coarse_phi_B) {
353     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
354     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
355     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
356     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
357   }
358   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
359     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
360     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
361     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
362     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
363   }
364 
365   if (pcbddc->dbg_flag) {
366     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
367     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
368     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
369   }
370   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
371   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
372 
373   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
374 
375   /* vertices */
376   for (i=0;i<n_vertices;i++) {
377     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
378     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
379     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
380     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
381     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
382     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
383     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
384     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
385     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
386     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
387     if (n_constraints) {
388       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
389       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
390       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
391     }
392     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
393     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
394 
395     /* Set values in coarse basis function and subdomain part of coarse_mat */
396     /* coarse basis functions */
397     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
398     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
399     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
400     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
401     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
402     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
403     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
404     if (pcbddc->switch_static || pcbddc->dbg_flag) {
405       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
406       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
408       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
409       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
410     }
411     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
412     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
413     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
414     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
415     if (n_constraints) {
416       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
417       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
418       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
419     }
420 
421     /* check */
422     if (pcbddc->dbg_flag) {
423       /* assemble subdomain vector on local nodes */
424       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
425       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
426       if (n_R) {
427         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
428       }
429       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
430       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
431       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
432       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
433       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
434       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
435       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
436       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
437       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
438       if (n_constraints) {
439         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
440         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
441         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
442       }
443       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
444       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
445       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
446       /* check saddle point solution */
447       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
448       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
449       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
450       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
451       /* shift by the identity matrix */
452       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
453       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
454       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
455       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
456     }
457   }
458 
459   /* constraints */
460   for (i=0;i<n_constraints;i++) {
461     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
462     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
463     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
464     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
465     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
466     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
467     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
468     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
469     if (n_vertices) {
470       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
471     }
472     /* Set values in coarse basis function and subdomain part of coarse_mat */
473     /* coarse basis functions */
474     j = i+n_vertices; /* don't touch this! */
475     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
476     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
479     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
480     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
481     if (pcbddc->switch_static || pcbddc->dbg_flag) {
482       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
483       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
484       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
485       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
486       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
487     }
488     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
489     if (n_vertices) {
490       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
491       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
492       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
493     }
494     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
495     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
496     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
497 
498     if (pcbddc->dbg_flag) {
499       /* assemble subdomain vector on nodes */
500       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
501       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
502       if (n_R) {
503         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
504       }
505       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
506       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
507       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
508       /* assemble subdomain vector of lagrange multipliers */
509       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
510       if (n_vertices) {
511         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
512         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
513         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
514       }
515       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
516       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
517       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
518       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
519       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
520       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
521       /* check saddle point solution */
522       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
523       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
524       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
525       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
526       /* shift by the identity matrix */
527       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
528       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
529       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
530       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
531     }
532   }
533   /* call assembling routines for local coarse basis */
534   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
535   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
536   if (pcbddc->switch_static || pcbddc->dbg_flag) {
537     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
538     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
539   }
540 
541   /* compute other basis functions for non-symmetric problems */
542   /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/
543   { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */
544     PetscBool setsym;
545     ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr);
546     if (!setsym) pcbddc->issym = PETSC_FALSE;
547   }
548 
549   if (!pcbddc->issym) {
550     if (!pcbddc->coarse_psi_B) {
551       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
552       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
553       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
554       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
555     }
556     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
557       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
558       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
559       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
560       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
561     }
562     for (i=0;i<pcbddc->local_primal_size;i++) {
563       if (n_constraints) {
564         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
565         for (j=0;j<n_constraints;j++) {
566           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
567         }
568         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
569         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
570       }
571       if (i<n_vertices) {
572         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
573         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
574         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
575         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
576         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
577         if (n_constraints) {
578           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
579         }
580       } else {
581         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
582       }
583       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
584       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
585       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
586       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
587       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
588       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
589       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
590       if (i<n_vertices) {
591         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
592       }
593       if (pcbddc->switch_static || pcbddc->dbg_flag) {
594         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
595         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
596         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
597         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
598         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
599       }
600 
601       if (pcbddc->dbg_flag) {
602         /* assemble subdomain vector on nodes */
603         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
604         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
605         if (n_R) {
606           ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
607         }
608         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
609         if (i<n_vertices) {
610           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
611         }
612         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
613         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
614         /* assemble subdomain vector of lagrange multipliers */
615         for (j=0;j<pcbddc->local_primal_size;j++) {
616           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
617         }
618         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
619         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
620         /* check saddle point solution */
621         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
622         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
623         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
624         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
625         /* shift by the identity matrix */
626         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
627         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
628         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
629         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
630       }
631     }
632     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
633     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
634     if (pcbddc->switch_static || pcbddc->dbg_flag) {
635       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
636       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
637     }
638     unsymmetric_check = PETSC_TRUE;
639   } else { /* take references to already computed coarse basis */
640     unsymmetric_check = PETSC_FALSE;
641     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
642     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
643     if (pcbddc->coarse_phi_D) {
644       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
645       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
646     }
647   }
648   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
649   /* Checking coarse_sub_mat and coarse basis functios */
650   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
651   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
652   if (pcbddc->dbg_flag) {
653     Mat         coarse_sub_mat;
654     Mat         AUXMAT,TM1,TM2,TM3,TM4;
655     Mat         coarse_phi_D,coarse_phi_B;
656     Mat         coarse_psi_D,coarse_psi_B;
657     Mat         A_II,A_BB,A_IB,A_BI;
658     MatType     checkmattype=MATSEQAIJ;
659     PetscReal   real_value;
660 
661     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
662     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
663     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
664     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
665     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
666     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
667     if (unsymmetric_check) {
668       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
669       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
670     }
671     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
672 
673     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
674     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
675     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
676     if (unsymmetric_check) {
677       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
678       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
679       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
680       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
681       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
682       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
683       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
684       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
685       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
686       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
687       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
688       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
689     } else {
690       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
691       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
692       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
693       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
694       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
695       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
696       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
697       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
698     }
699     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
700     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
701     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
702     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
703     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
704     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
705     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
706     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
707     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
708     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
709     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
710     for (i=0;i<pcbddc->local_primal_size;i++) {
711       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
712     }
713     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
714     for (i=0;i<pcbddc->local_primal_size;i++) {
715       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
716     }
717     if (unsymmetric_check) {
718       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
719       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
720         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
721       }
722       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
723       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
724         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
725       }
726     }
727     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
728     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
729     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
730     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
731     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
732     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
733     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
734     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
735     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
736     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
737     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
738     if (unsymmetric_check) {
739       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
740       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
741     }
742     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
743     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
744     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
745     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
746   }
747   /* free memory */
748   if (n_vertices) {
749     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
750     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
751     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
752     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
753     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
754   }
755   if (n_constraints) {
756     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
757     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
758     ierr = MatDestroy(&M1);CHKERRQ(ierr);
759     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
760   }
761   ierr = PetscFree(auxindices);CHKERRQ(ierr);
762   /* get back data */
763   *coarse_submat_vals_n = coarse_submat_vals;
764   PetscFunctionReturn(0);
765 }
766 
767 #undef __FUNCT__
768 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
769 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
770 {
771   PC_IS*            pcis = (PC_IS*)(pc->data);
772   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
773   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
774   PetscBool         issbaij,isseqaij;
775   /* manage repeated solves */
776   MatReuse          reuse;
777   PetscErrorCode    ierr;
778 
779   PetscFunctionBegin;
780   if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) {
781     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
782   }
783   /* get mat flags */
784   reuse = MAT_INITIAL_MATRIX;
785   if (pc->setupcalled) {
786     if (pc->flag == SAME_NONZERO_PATTERN) {
787       reuse = MAT_REUSE_MATRIX;
788     } else {
789       reuse = MAT_INITIAL_MATRIX;
790     }
791   }
792   if (reuse == MAT_INITIAL_MATRIX) {
793     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
794     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
795     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
796     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
797     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
798   }
799 
800   /* transform local matrices if needed */
801   if (pcbddc->ChangeOfBasisMatrix) {
802     Mat       *change_mat_all;
803     IS        is_local,is_global;
804     PetscBool sorted;
805     PetscInt  *idxs_perm;
806 
807     ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr);
808     ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
809     ierr = ISDestroy(&is_local);CHKERRQ(ierr);
810     ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr);
811     if (!sorted) {
812       const PetscInt *idxs;
813       PetscInt *idxs_sorted,i;
814 
815       ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr);
816       ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr);
817       for (i=0;i<pcis->n;i++) {
818         idxs_perm[i] = i;
819       }
820       ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr);
821       ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr);
822       for (i=0;i<pcis->n;i++) {
823         idxs_sorted[i] = idxs[idxs_perm[i]];
824       }
825       ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr);
826       ierr = ISDestroy(&is_global);CHKERRQ(ierr);
827       ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr);
828     }
829 
830     /* get change of basis on the whole set of local dofs */
831     ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr);
832 
833     if (!sorted) {
834       Mat      new_mat;
835       IS       is_perm;
836       PetscInt *idxs,i;
837 
838       ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr);
839       for (i=0;i<pcis->n;i++) {
840         idxs[idxs_perm[i]] = i;
841       }
842       ierr = PetscFree(idxs_perm);CHKERRQ(ierr);
843       ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr);
844       ierr = ISSetPermutation(is_perm);CHKERRQ(ierr);
845       ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr);
846       ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr);
847       change_mat_all[0] = new_mat;
848       ierr = ISDestroy(&is_perm);CHKERRQ(ierr);
849     }
850 
851     /* check */
852     if (pcbddc->dbg_flag) {
853       Vec       x,x_change;
854       PetscReal error;
855 
856       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
857       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
858       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
859       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
860       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862       ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
863       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
864       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
865       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
866       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
867       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
868       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
869       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
870       ierr = VecDestroy(&x);CHKERRQ(ierr);
871       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
872     }
873 
874     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
875     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
876     if (isseqaij) {
877       ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
878     } else {
879       Mat work_mat;
880       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
881       ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
882       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
883     }
884     /*
885     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
886     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
887     */
888     ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr);
889     ierr = ISDestroy(&is_global);CHKERRQ(ierr);
890   } else {
891     /* without change of basis, the local matrix is unchanged */
892     if (!pcbddc->local_mat) {
893       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
894       pcbddc->local_mat = matis->A;
895     }
896   }
897 
898   /* get submatrices */
899   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
900   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
901   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
902   if (!issbaij) {
903     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
904     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
905   } else {
906     Mat newmat;
907     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
908     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
909     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
910     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
911   }
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
917 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
918 {
919   PC_IS*         pcis = (PC_IS*)(pc->data);
920   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
921   IS             is_aux1,is_aux2;
922   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
923   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
924   PetscInt       vbs,bs;
925   PetscBT        bitmask;
926   PetscErrorCode ierr;
927 
928   PetscFunctionBegin;
929   /*
930     No need to setup local scatters if
931       - primal space is unchanged
932         AND
933       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
934         AND
935       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
936   */
937   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
938     PetscFunctionReturn(0);
939   }
940   /* destroy old objects */
941   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
942   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
943   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
944   /* Set Non-overlapping dimensions */
945   n_B = pcis->n_B; n_D = pcis->n - n_B;
946   n_vertices = pcbddc->n_actual_vertices;
947   /* create auxiliary bitmask */
948   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
949   for (i=0;i<n_vertices;i++) {
950     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
951   }
952 
953   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
954   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
955   for (i=0, n_R=0; i<pcis->n; i++) {
956     if (!PetscBTLookup(bitmask,i)) {
957       idx_R_local[n_R] = i;
958       n_R++;
959     }
960   }
961 
962   /* Block code */
963   vbs = 1;
964   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
965   if (bs>1 && !(n_vertices%bs)) {
966     PetscBool is_blocked = PETSC_TRUE;
967     PetscInt  *vary;
968     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
969     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
970     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
971     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
972     for (i=0; i<n_vertices; i++) {
973       if (vary[i]!=0 && vary[i]!=bs) {
974         is_blocked = PETSC_FALSE;
975         break;
976       }
977     }
978     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
979       vbs = bs;
980       for (i=0;i<n_R/vbs;i++) {
981         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
982       }
983     }
984     ierr = PetscFree(vary);CHKERRQ(ierr);
985   }
986   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
987   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
988 
989   /* print some info if requested */
990   if (pcbddc->dbg_flag) {
991     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
992     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
993     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
994     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
995     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
996     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
997     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
998     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
999   }
1000 
1001   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1002   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1003   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1004   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1005   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1006   for (i=0; i<n_D; i++) {
1007     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1008   }
1009   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1010   for (i=0, j=0; i<n_R; i++) {
1011     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1012       aux_array1[j++] = i;
1013     }
1014   }
1015   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1016   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1017   for (i=0, j=0; i<n_B; i++) {
1018     if (!PetscBTLookup(bitmask,is_indices[i])) {
1019       aux_array2[j++] = i;
1020     }
1021   }
1022   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1023   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1024   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1025   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1026   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1027 
1028   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1029     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1030     for (i=0, j=0; i<n_R; i++) {
1031       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1032         aux_array1[j++] = i;
1033       }
1034     }
1035     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1036     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1037     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1038   }
1039   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1040   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1047 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1048 {
1049   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1050   PC_IS          *pcis = (PC_IS*)pc->data;
1051   PC             pc_temp;
1052   Mat            A_RR;
1053   MatReuse       reuse;
1054   PetscScalar    m_one = -1.0;
1055   PetscReal      value;
1056   PetscInt       n_D,n_R,ibs,mbs;
1057   PetscBool      use_exact,use_exact_reduced,issbaij;
1058   PetscErrorCode ierr;
1059   /* prefixes stuff */
1060   char           dir_prefix[256],neu_prefix[256],str_level[16];
1061   size_t         len;
1062 
1063   PetscFunctionBegin;
1064 
1065   /* compute prefixes */
1066   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1067   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1068   if (!pcbddc->current_level) {
1069     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1070     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1071     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1072     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1073   } else {
1074     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1075     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1076     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1077     len -= 15; /* remove "pc_bddc_coarse_" */
1078     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1079     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1080     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1081     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1082     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1083     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1084     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1085     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1086   }
1087 
1088   /* DIRICHLET PROBLEM */
1089   /* Matrix for Dirichlet problem is pcis->A_II */
1090   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1091   if (!pcbddc->ksp_D) { /* create object if not yet build */
1092     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1093     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1094     /* default */
1095     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1096     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1097     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1098     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1099     if (issbaij) {
1100       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1101     } else {
1102       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1103     }
1104     /* Allow user's customization */
1105     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1106     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1107   }
1108   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1109   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1110   if (!n_D) {
1111     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1112     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1113   }
1114   /* Set Up KSP for Dirichlet problem of BDDC */
1115   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1116   /* set ksp_D into pcis data */
1117   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1118   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1119   pcis->ksp_D = pcbddc->ksp_D;
1120 
1121   /* NEUMANN PROBLEM */
1122   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1123   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1124   if (pcbddc->ksp_R) { /* already created ksp */
1125     PetscInt nn_R;
1126     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1127     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1128     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1129     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1130       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1131       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1132       reuse = MAT_INITIAL_MATRIX;
1133     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1134       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1135         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1136         reuse = MAT_INITIAL_MATRIX;
1137       } else { /* safe to reuse the matrix */
1138         reuse = MAT_REUSE_MATRIX;
1139       }
1140     }
1141     /* last check */
1142     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1143       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1144       reuse = MAT_INITIAL_MATRIX;
1145     }
1146   } else { /* first time, so we need to create the matrix */
1147     reuse = MAT_INITIAL_MATRIX;
1148   }
1149   /* extract A_RR */
1150   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1151   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1152   if (ibs != mbs) {
1153     Mat newmat;
1154     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1155     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1156     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1157   } else {
1158     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1159   }
1160   if (!pcbddc->ksp_R) { /* create object if not present */
1161     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1162     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1163     /* default */
1164     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1165     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1166     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1167     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1168     if (issbaij) {
1169       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1170     } else {
1171       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1172     }
1173     /* Allow user's customization */
1174     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1175     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1176   }
1177   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1178   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1179   if (!n_R) {
1180     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1181     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1182   }
1183   /* Set Up KSP for Neumann problem of BDDC */
1184   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1185 
1186   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1187   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1188     /* Dirichlet */
1189     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1190     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1191     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1192     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1193     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1194     /* need to be adapted? */
1195     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1196     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1197     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1198     /* print info */
1199     if (pcbddc->dbg_flag) {
1200       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1201       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1202       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1203       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1204       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1205       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1206     }
1207     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1208       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1209     }
1210 
1211     /* Neumann */
1212     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1213     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1214     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1215     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1216     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1217     /* need to be adapted? */
1218     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1219     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1220     /* print info */
1221     if (pcbddc->dbg_flag) {
1222       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1223       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1224     }
1225     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1226       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1227     }
1228   }
1229   /* free Neumann problem's matrix */
1230   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1236 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1237 {
1238   PetscErrorCode ierr;
1239   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1240 
1241   PetscFunctionBegin;
1242   if (applytranspose) {
1243     if (pcbddc->local_auxmat1) {
1244       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1245       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1246     }
1247     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1248   } else {
1249     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1250     if (pcbddc->local_auxmat1) {
1251       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1252       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1253     }
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1259 #undef __FUNCT__
1260 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1261 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1262 {
1263   PetscErrorCode ierr;
1264   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1265   PC_IS*            pcis = (PC_IS*)  (pc->data);
1266   const PetscScalar zero = 0.0;
1267 
1268   PetscFunctionBegin;
1269   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1270   if (applytranspose) {
1271     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1272     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1273   } else {
1274     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1275     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1276   }
1277   /* start communications from local primal nodes to rhs of coarse solver */
1278   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1279   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1280   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1281 
1282   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1283   /* TODO remove null space when doing multilevel */
1284   if (pcbddc->coarse_ksp) {
1285     if (applytranspose) {
1286       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1287     } else {
1288       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1289     }
1290   }
1291 
1292   /* Local solution on R nodes */
1293   if (pcis->n) {
1294     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1295     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1296     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1297     if (pcbddc->switch_static) {
1298       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1299       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1300     }
1301     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1302     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1303     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1304     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305     if (pcbddc->switch_static) {
1306       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1307       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1308     }
1309   }
1310 
1311   /* communications from coarse sol to local primal nodes */
1312   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1313   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1314 
1315   /* Sum contributions from two levels */
1316   if (applytranspose) {
1317     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1318     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1319   } else {
1320     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1321     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1327 #undef __FUNCT__
1328 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1329 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1330 {
1331   PetscErrorCode ierr;
1332   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1333   PetscScalar    *array,*array2;
1334   Vec            from,to;
1335 
1336   PetscFunctionBegin;
1337   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1338     from = pcbddc->coarse_vec;
1339     to = pcbddc->vec1_P;
1340     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1341       Vec tvec;
1342       PetscInt lsize;
1343       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1344       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1345       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1346       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1347       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1348       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1349       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1350     }
1351   } else { /* from local to global -> put data in coarse right hand side */
1352     from = pcbddc->vec1_P;
1353     to = pcbddc->coarse_vec;
1354   }
1355   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1361 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1362 {
1363   PetscErrorCode ierr;
1364   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1365   PetscScalar    *array,*array2;
1366   Vec            from,to;
1367 
1368   PetscFunctionBegin;
1369   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1370     from = pcbddc->coarse_vec;
1371     to = pcbddc->vec1_P;
1372   } else { /* from local to global -> put data in coarse right hand side */
1373     from = pcbddc->vec1_P;
1374     to = pcbddc->coarse_vec;
1375   }
1376   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1377   if (smode == SCATTER_FORWARD) {
1378     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1379       Vec tvec;
1380       PetscInt lsize;
1381       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1382       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1383       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1384       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1385       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1386       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1387       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1388     }
1389   }
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 /* uncomment for testing purposes */
1394 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1395 #undef __FUNCT__
1396 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1397 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1398 {
1399   PetscErrorCode    ierr;
1400   PC_IS*            pcis = (PC_IS*)(pc->data);
1401   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1402   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1403   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1404   MatType           impMatType=MATSEQAIJ;
1405   /* one and zero */
1406   PetscScalar       one=1.0,zero=0.0;
1407   /* space to store constraints and their local indices */
1408   PetscScalar       *temp_quadrature_constraint;
1409   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1410   /* iterators */
1411   PetscInt          i,j,k,total_counts,temp_start_ptr;
1412   /* stuff to store connected components stored in pcbddc->mat_graph */
1413   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1414   PetscInt          n_ISForFaces,n_ISForEdges;
1415   /* near null space stuff */
1416   MatNullSpace      nearnullsp;
1417   const Vec         *nearnullvecs;
1418   Vec               *localnearnullsp;
1419   PetscBool         nnsp_has_cnst;
1420   PetscInt          nnsp_size;
1421   PetscScalar       *array;
1422   /* BLAS integers */
1423   PetscBLASInt      lwork,lierr;
1424   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1425   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1426   /* LAPACK working arrays for SVD or POD */
1427   PetscBool         skip_lapack;
1428   PetscScalar       *work;
1429   PetscReal         *singular_vals;
1430 #if defined(PETSC_USE_COMPLEX)
1431   PetscReal         *rwork;
1432 #endif
1433 #if defined(PETSC_MISSING_LAPACK_GESVD)
1434   PetscBLASInt      Blas_one_2=1;
1435   PetscScalar       *temp_basis,*correlation_mat;
1436 #else
1437   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1438   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1439 #endif
1440   /* reuse */
1441   PetscInt          olocal_primal_size;
1442   PetscInt          *oprimal_indices_local_idxs;
1443   /* change of basis */
1444   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1445   PetscBool         boolforchange,qr_needed;
1446   PetscBT           touched,change_basis,qr_needed_idx;
1447   /* auxiliary stuff */
1448   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1449   PetscInt          ncc,*gidxs,*permutation,*temp_indices_to_constraint_work;
1450   PetscScalar       *temp_quadrature_constraint_work;
1451   /* some quantities */
1452   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1453   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1454 
1455 
1456   PetscFunctionBegin;
1457   /* Destroy Mat objects computed previously */
1458   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1459   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1460   /* Get index sets for faces, edges and vertices from graph */
1461   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1462   /* free unneeded index sets */
1463   if (!pcbddc->use_vertices) {
1464     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1465   }
1466   if (!pcbddc->use_edges) {
1467     for (i=0;i<n_ISForEdges;i++) {
1468       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1469     }
1470     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1471     n_ISForEdges = 0;
1472   }
1473   if (!pcbddc->use_faces) {
1474     for (i=0;i<n_ISForFaces;i++) {
1475       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1476     }
1477     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1478     n_ISForFaces = 0;
1479   }
1480   /* HACKS (the following two blocks of code) */
1481   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1482     pcbddc->use_change_of_basis = PETSC_TRUE;
1483     if (!ISForEdges) {
1484       pcbddc->use_change_on_faces = PETSC_TRUE;
1485     }
1486   }
1487   if (pcbddc->NullSpace) {
1488     /* use_change_of_basis should be consistent among processors */
1489     PetscBool tbool[2],gbool[2];
1490     tbool [0] = pcbddc->use_change_of_basis;
1491     tbool [1] = pcbddc->use_change_on_faces;
1492     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1493     pcbddc->use_change_of_basis = gbool[0];
1494     pcbddc->use_change_on_faces = gbool[1];
1495   }
1496   /* print some info */
1497   if (pcbddc->dbg_flag) {
1498     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1499     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1500     i = 0;
1501     if (ISForVertices) {
1502       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1503     }
1504     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1505     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1506     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1507     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1508   }
1509   /* check if near null space is attached to global mat */
1510   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1511   if (nearnullsp) {
1512     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1513     /* remove any stored info */
1514     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1515     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1516     /* store information for BDDC solver reuse */
1517     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1518     pcbddc->onearnullspace = nearnullsp;
1519     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1520     for (i=0;i<nnsp_size;i++) {
1521       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1522     }
1523   } else { /* if near null space is not provided BDDC uses constants by default */
1524     nnsp_size = 0;
1525     nnsp_has_cnst = PETSC_TRUE;
1526   }
1527   /* get max number of constraints on a single cc */
1528   max_constraints = nnsp_size;
1529   if (nnsp_has_cnst) max_constraints++;
1530 
1531   /*
1532        Evaluate maximum storage size needed by the procedure
1533        - temp_indices will contain start index of each constraint stored as follows
1534        - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1535        - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1536        - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1537                                                                                                                                                          */
1538   total_counts = n_ISForFaces+n_ISForEdges;
1539   total_counts *= max_constraints;
1540   n_vertices = 0;
1541   if (ISForVertices) {
1542     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1543   }
1544   total_counts += n_vertices;
1545   ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1546   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1547   total_counts = 0;
1548   max_size_of_constraint = 0;
1549   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1550     if (i<n_ISForEdges) {
1551       used_IS = &ISForEdges[i];
1552     } else {
1553       used_IS = &ISForFaces[i-n_ISForEdges];
1554     }
1555     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1556     total_counts += j;
1557     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1558   }
1559   total_counts *= max_constraints;
1560   total_counts += n_vertices;
1561   ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1562   /* get local part of global near null space vectors */
1563   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1564   for (k=0;k<nnsp_size;k++) {
1565     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1566     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1567     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568   }
1569 
1570   /* whether or not to skip lapack calls */
1571   skip_lapack = PETSC_TRUE;
1572   if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1573 
1574   /* allocate some auxiliary stuff */
1575   if (!skip_lapack || pcbddc->use_qr_single) {
1576     ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1577   }
1578 
1579   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1580   if (!skip_lapack) {
1581     PetscScalar temp_work;
1582 
1583 #if defined(PETSC_MISSING_LAPACK_GESVD)
1584     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1585     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1586     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1587     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1588 #if defined(PETSC_USE_COMPLEX)
1589     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1590 #endif
1591     /* now we evaluate the optimal workspace using query with lwork=-1 */
1592     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1593     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1594     lwork = -1;
1595     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1596 #if !defined(PETSC_USE_COMPLEX)
1597     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1598 #else
1599     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1600 #endif
1601     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1602     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1603 #else /* on missing GESVD */
1604     /* SVD */
1605     PetscInt max_n,min_n;
1606     max_n = max_size_of_constraint;
1607     min_n = max_constraints;
1608     if (max_size_of_constraint < max_constraints) {
1609       min_n = max_size_of_constraint;
1610       max_n = max_constraints;
1611     }
1612     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1613 #if defined(PETSC_USE_COMPLEX)
1614     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1615 #endif
1616     /* now we evaluate the optimal workspace using query with lwork=-1 */
1617     lwork = -1;
1618     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1619     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1620     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1621     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1622 #if !defined(PETSC_USE_COMPLEX)
1623     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1624 #else
1625     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1626 #endif
1627     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1628     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1629 #endif /* on missing GESVD */
1630     /* Allocate optimal workspace */
1631     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1632     ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1633   }
1634   /* Now we can loop on constraining sets */
1635   total_counts = 0;
1636   temp_indices[0] = 0;
1637   /* vertices */
1638   if (ISForVertices) {
1639     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1640     if (nnsp_has_cnst) { /* consider all vertices */
1641       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1642       for (i=0;i<n_vertices;i++) {
1643         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1644         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1645         total_counts++;
1646       }
1647     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1648       PetscBool used_vertex;
1649       for (i=0;i<n_vertices;i++) {
1650         used_vertex = PETSC_FALSE;
1651         k = 0;
1652         while (!used_vertex && k<nnsp_size) {
1653           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1654           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1655             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1656             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1657             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1658             total_counts++;
1659             used_vertex = PETSC_TRUE;
1660           }
1661           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1662           k++;
1663         }
1664       }
1665     }
1666     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1667     n_vertices = total_counts;
1668   }
1669 
1670   /* edges and faces */
1671   for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1672     if (ncc<n_ISForEdges) {
1673       used_IS = &ISForEdges[ncc];
1674       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1675     } else {
1676       used_IS = &ISForFaces[ncc-n_ISForEdges];
1677       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1678     }
1679     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1680     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1681     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1682     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1683     /* change of basis should not be performed on local periodic nodes */
1684     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1685     if (nnsp_has_cnst) {
1686       PetscScalar quad_value;
1687       temp_constraints++;
1688       if (!pcbddc->use_nnsp_true) {
1689         quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1690       } else {
1691         quad_value = 1.0;
1692       }
1693       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1694       for (j=0;j<size_of_constraint;j++) {
1695         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1696       }
1697       /* sort by global ordering if using lapack subroutines */
1698       if (!skip_lapack || pcbddc->use_qr_single) {
1699         ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1700         for (j=0;j<size_of_constraint;j++) {
1701           permutation[j]=j;
1702         }
1703         ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1704         for (j=0;j<size_of_constraint;j++) {
1705           temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1706           temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1707         }
1708         ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1709         ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1710       }
1711       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1712       total_counts++;
1713     }
1714     for (k=0;k<nnsp_size;k++) {
1715       PetscReal real_value;
1716       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1717       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1718       for (j=0;j<size_of_constraint;j++) {
1719         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1720       }
1721       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1722       /* check if array is null on the connected component */
1723       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1724       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1725       if (real_value > 0.0) { /* keep indices and values */
1726         /* sort by global ordering if using lapack subroutines */
1727         if (!skip_lapack || pcbddc->use_qr_single) {
1728           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1729           for (j=0;j<size_of_constraint;j++) {
1730             permutation[j]=j;
1731           }
1732           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1733           for (j=0;j<size_of_constraint;j++) {
1734             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1735             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1736           }
1737           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1738           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1739         }
1740         temp_constraints++;
1741         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1742         total_counts++;
1743       }
1744     }
1745     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1746     valid_constraints = temp_constraints;
1747     if (!pcbddc->use_nnsp_true && temp_constraints) {
1748       if (temp_constraints == 1) { /* just normalize the constraint */
1749         PetscScalar norm;
1750         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1751         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1752         norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
1753         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1754       } else { /* perform SVD */
1755         PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1756 
1757 #if defined(PETSC_MISSING_LAPACK_GESVD)
1758         /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1759            POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1760            -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1761               the constraints basis will differ (by a complex factor with absolute value equal to 1)
1762               from that computed using LAPACKgesvd
1763            -> This is due to a different computation of eigenvectors in LAPACKheev
1764            -> The quality of the POD-computed basis will be the same */
1765         ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1766         /* Store upper triangular part of correlation matrix */
1767         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1768         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1769         for (j=0;j<temp_constraints;j++) {
1770           for (k=0;k<j+1;k++) {
1771             PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1772           }
1773         }
1774         /* compute eigenvalues and eigenvectors of correlation matrix */
1775         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1776         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1777 #if !defined(PETSC_USE_COMPLEX)
1778         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1779 #else
1780         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1781 #endif
1782         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1783         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1784         /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1785         j = 0;
1786         while (j < temp_constraints && singular_vals[j] < tol) j++;
1787         total_counts = total_counts-j;
1788         valid_constraints = temp_constraints-j;
1789         /* scale and copy POD basis into used quadrature memory */
1790         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1791         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1792         ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1793         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1794         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1795         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1796         if (j<temp_constraints) {
1797           PetscInt ii;
1798           for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1799           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1800           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1801           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1802           for (k=0;k<temp_constraints-j;k++) {
1803             for (ii=0;ii<size_of_constraint;ii++) {
1804               temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1805             }
1806           }
1807         }
1808 #else  /* on missing GESVD */
1809         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1810         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1811         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1812         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1813 #if !defined(PETSC_USE_COMPLEX)
1814         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1815 #else
1816         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1817 #endif
1818         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1819         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1820         /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1821         k = temp_constraints;
1822         if (k > size_of_constraint) k = size_of_constraint;
1823         j = 0;
1824         while (j < k && singular_vals[k-j-1] < tol) j++;
1825         valid_constraints = k-j;
1826         total_counts = total_counts-temp_constraints+valid_constraints;
1827 #endif /* on missing GESVD */
1828       }
1829     }
1830     /* setting change_of_basis flag is safe now */
1831     if (boolforchange) {
1832       for (j=0;j<valid_constraints;j++) {
1833         PetscBTSet(change_basis,total_counts-j-1);
1834       }
1835     }
1836   }
1837   /* free index sets of faces, edges and vertices */
1838   for (i=0;i<n_ISForFaces;i++) {
1839     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1840   }
1841   if (n_ISForFaces) {
1842     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1843   }
1844   for (i=0;i<n_ISForEdges;i++) {
1845     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1846   }
1847   if (n_ISForEdges) {
1848     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1849   }
1850   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1851   /* map temp_indices_to_constraint in boundary numbering */
1852   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1853   if (i != temp_indices[total_counts]) {
1854     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1855   }
1856 
1857   /* free workspace */
1858   if (!skip_lapack || pcbddc->use_qr_single) {
1859     ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
1860   }
1861   if (!skip_lapack) {
1862     ierr = PetscFree(work);CHKERRQ(ierr);
1863 #if defined(PETSC_USE_COMPLEX)
1864     ierr = PetscFree(rwork);CHKERRQ(ierr);
1865 #endif
1866     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1867 #if defined(PETSC_MISSING_LAPACK_GESVD)
1868     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1869     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1870 #endif
1871   }
1872   for (k=0;k<nnsp_size;k++) {
1873     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1874   }
1875   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1876 
1877   /* set quantities in pcbddc data structure and store previous primal size */
1878   /* n_vertices defines the number of subdomain corners in the primal space */
1879   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1880   olocal_primal_size = pcbddc->local_primal_size;
1881   pcbddc->local_primal_size = total_counts;
1882   pcbddc->n_vertices = n_vertices;
1883   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1884 
1885   /* Create constraint matrix */
1886   /* The constraint matrix is used to compute the l2g map of primal dofs */
1887   /* so we need to set it up properly either with or without change of basis */
1888   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1889   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1890   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1891   /* array to compute a local numbering of constraints : vertices first then constraints */
1892   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1893   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1894   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1895   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1896   /* auxiliary stuff for basis change */
1897   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1898   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1899 
1900   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1901   total_primal_vertices=0;
1902   for (i=0;i<pcbddc->local_primal_size;i++) {
1903     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1904     if (size_of_constraint == 1) {
1905       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1906       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1907       aux_primal_minloc[total_primal_vertices]=0;
1908       total_primal_vertices++;
1909     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1910       PetscInt min_loc,min_index;
1911       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1912       /* find first untouched local node */
1913       k = 0;
1914       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1915       min_index = global_indices[k];
1916       min_loc = k;
1917       /* search the minimum among global nodes already untouched on the cc */
1918       for (k=1;k<size_of_constraint;k++) {
1919         /* there can be more than one constraint on a single connected component */
1920         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1921           min_index = global_indices[k];
1922           min_loc = k;
1923         }
1924       }
1925       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1926       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1927       aux_primal_minloc[total_primal_vertices]=min_loc;
1928       total_primal_vertices++;
1929     }
1930   }
1931   /* determine if a QR strategy is needed for change of basis */
1932   qr_needed = PETSC_FALSE;
1933   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1934   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1935     if (PetscBTLookup(change_basis,i)) {
1936       if (!pcbddc->use_qr_single) {
1937         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1938         j = 0;
1939         for (k=0;k<size_of_constraint;k++) {
1940           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1941             j++;
1942           }
1943         }
1944         /* found more than one primal dof on the cc */
1945         if (j > 1) {
1946           PetscBTSet(qr_needed_idx,i);
1947           qr_needed = PETSC_TRUE;
1948         }
1949       } else {
1950         PetscBTSet(qr_needed_idx,i);
1951         qr_needed = PETSC_TRUE;
1952       }
1953     }
1954   }
1955   /* free workspace */
1956   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1957 
1958   /* permute indices in order to have a sorted set of vertices */
1959   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1960 
1961   /* nonzero structure of constraint matrix */
1962   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1963   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1964   j=total_primal_vertices;
1965   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1966     if (!PetscBTLookup(change_basis,i)) {
1967       nnz[j]=temp_indices[i+1]-temp_indices[i];
1968       j++;
1969     }
1970   }
1971   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1972   ierr = PetscFree(nnz);CHKERRQ(ierr);
1973   /* set values in constraint matrix */
1974   for (i=0;i<total_primal_vertices;i++) {
1975     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1976   }
1977   total_counts = total_primal_vertices;
1978   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1979     if (!PetscBTLookup(change_basis,i)) {
1980       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1981       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1982       total_counts++;
1983     }
1984   }
1985   /* assembling */
1986   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1987   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988   /*
1989   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1990   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1991   */
1992   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1993   if (pcbddc->use_change_of_basis) {
1994     /* dual and primal dofs on a single cc */
1995     PetscInt     dual_dofs,primal_dofs;
1996     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1997     PetscInt     primal_counter;
1998     /* working stuff for GEQRF */
1999     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2000     PetscBLASInt lqr_work;
2001     /* working stuff for UNGQR */
2002     PetscScalar  *gqr_work,lgqr_work_t;
2003     PetscBLASInt lgqr_work;
2004     /* working stuff for TRTRS */
2005     PetscScalar  *trs_rhs;
2006     PetscBLASInt Blas_NRHS;
2007     /* pointers for values insertion into change of basis matrix */
2008     PetscInt     *start_rows,*start_cols;
2009     PetscScalar  *start_vals;
2010     /* working stuff for values insertion */
2011     PetscBT      is_primal;
2012     /* matrix sizes */
2013     PetscInt     global_size,local_size;
2014     /* work array for nonzeros */
2015     PetscScalar  *nnz_array;
2016     /* temporary change of basis */
2017     Mat          localChangeOfBasisMatrix;
2018     /* auxiliary work for global change of basis */
2019     Vec          nnz_vec;
2020     PetscInt     *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz;
2021     PetscInt     nvtxs,*xadj,*adjncy,*idxs_mapped;
2022     PetscScalar  *vals;
2023     PetscBool    done;
2024 
2025     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2026     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2027     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
2028     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
2029 
2030     /* nonzeros for local mat */
2031     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
2032     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
2033     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2034       if (PetscBTLookup(change_basis,i)) {
2035         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2036         if (PetscBTLookup(qr_needed_idx,i)) {
2037           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2038         } else {
2039           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
2040           /* get local primal index on the cc */
2041           j = 0;
2042           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2043           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2044         }
2045       }
2046     }
2047     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2048     /* Set initial identity in the matrix */
2049     for (i=0;i<pcis->n_B;i++) {
2050       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2051     }
2052 
2053     if (pcbddc->dbg_flag) {
2054       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2055       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2056     }
2057 
2058 
2059     /* Now we loop on the constraints which need a change of basis */
2060     /*
2061        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2062        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2063 
2064        Basic blocks of change of basis matrix T computed by
2065 
2066           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2067 
2068             | 1        0   ...        0         s_1/S |
2069             | 0        1   ...        0         s_2/S |
2070             |              ...                        |
2071             | 0        ...            1     s_{n-1}/S |
2072             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2073 
2074             with S = \sum_{i=1}^n s_i^2
2075             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2076                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2077 
2078           - QR decomposition of constraints otherwise
2079     */
2080     if (qr_needed) {
2081       /* space to store Q */
2082       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2083       /* first we issue queries for optimal work */
2084       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2085       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2086       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2087       lqr_work = -1;
2088       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2089       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2090       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2091       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2092       lgqr_work = -1;
2093       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2094       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2095       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2096       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2097       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2098       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2099       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2100       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2101       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2102       /* array to store scaling factors for reflectors */
2103       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2104       /* array to store rhs and solution of triangular solver */
2105       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2106       /* allocating workspace for check */
2107       if (pcbddc->dbg_flag) {
2108         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2109       }
2110     }
2111     /* array to store whether a node is primal or not */
2112     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2113     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2114     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2115     if (i != total_primal_vertices) {
2116       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2117     }
2118     for (i=0;i<total_primal_vertices;i++) {
2119       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2120     }
2121     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2122 
2123     /* loop on constraints and see whether or not they need a change of basis and compute it */
2124     /* -> using implicit ordering contained in temp_indices data */
2125     total_counts = pcbddc->n_vertices;
2126     primal_counter = total_counts;
2127     while (total_counts<pcbddc->local_primal_size) {
2128       primal_dofs = 1;
2129       if (PetscBTLookup(change_basis,total_counts)) {
2130         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2131         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2132           primal_dofs++;
2133         }
2134         /* get constraint info */
2135         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2136         dual_dofs = size_of_constraint-primal_dofs;
2137 
2138         if (pcbddc->dbg_flag) {
2139           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2140         }
2141 
2142         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2143 
2144           /* copy quadrature constraints for change of basis check */
2145           if (pcbddc->dbg_flag) {
2146             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2147           }
2148           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2149           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2150 
2151           /* compute QR decomposition of constraints */
2152           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2153           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2154           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2155           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2156           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2157           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2158           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2159 
2160           /* explictly compute R^-T */
2161           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2162           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2163           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2164           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2165           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2166           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2167           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2168           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2169           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2170           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2171 
2172           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2173           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2174           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2175           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2176           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2177           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2178           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2179           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2180           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2181 
2182           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2183              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2184              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2185           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2186           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2187           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2188           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2189           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2190           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2191           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2192           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2193           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2194           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2195 
2196           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2197           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2198           /* insert cols for primal dofs */
2199           for (j=0;j<primal_dofs;j++) {
2200             start_vals = &qr_basis[j*size_of_constraint];
2201             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2202             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2203           }
2204           /* insert cols for dual dofs */
2205           for (j=0,k=0;j<dual_dofs;k++) {
2206             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2207               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2208               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2209               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2210               j++;
2211             }
2212           }
2213 
2214           /* check change of basis */
2215           if (pcbddc->dbg_flag) {
2216             PetscInt   ii,jj;
2217             PetscBool valid_qr=PETSC_TRUE;
2218             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2219             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2220             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2221             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2222             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2223             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2224             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2225             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2226             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2227             for (jj=0;jj<size_of_constraint;jj++) {
2228               for (ii=0;ii<primal_dofs;ii++) {
2229                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2230                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2231               }
2232             }
2233             if (!valid_qr) {
2234               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2235               for (jj=0;jj<size_of_constraint;jj++) {
2236                 for (ii=0;ii<primal_dofs;ii++) {
2237                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2238                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2239                   }
2240                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2241                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2242                   }
2243                 }
2244               }
2245             } else {
2246               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2247             }
2248           }
2249         } else { /* simple transformation block */
2250           PetscInt    row,col;
2251           PetscScalar val,norm;
2252 
2253           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2254           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2255           for (j=0;j<size_of_constraint;j++) {
2256             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2257             if (!PetscBTLookup(is_primal,row)) {
2258               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2259               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2260               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2261             } else {
2262               for (k=0;k<size_of_constraint;k++) {
2263                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2264                 if (row != col) {
2265                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2266                 } else {
2267                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2268                 }
2269                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2270               }
2271             }
2272           }
2273           if (pcbddc->dbg_flag) {
2274             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2275           }
2276         }
2277         /* increment primal counter */
2278         primal_counter += primal_dofs;
2279       } else {
2280         if (pcbddc->dbg_flag) {
2281           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2282         }
2283       }
2284       /* increment constraint counter total_counts */
2285       total_counts += primal_dofs;
2286     }
2287 
2288     /* free workspace */
2289     if (qr_needed) {
2290       if (pcbddc->dbg_flag) {
2291         ierr = PetscFree(work);CHKERRQ(ierr);
2292       }
2293       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2294       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2295       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2296       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2297       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2298     }
2299     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2300     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2301     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2302 
2303     /* assembling of global change of variable */
2304     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2305     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2306     ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2307     ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2308     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2309     ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr);
2310 
2311     /* nonzeros (overestimated) */
2312     ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr);
2313     ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr);
2314     ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr);
2315     for (i=0;i<pcis->n;i++) {
2316       nnz_array[i] = 1.0;
2317       idxs_all[i] = i;
2318     }
2319     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2320     for (i=0;i<pcis->n_B;i++) {
2321       nnz_array[idxs_B[i]] = nnz[i];
2322     }
2323     if (pcis->n) {
2324       ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr);
2325     }
2326     ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr);
2327     ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr);
2328     ierr = PetscFree(nnz);CHKERRQ(ierr);
2329     ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr);
2330     ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr);
2331     ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2332     for (i=0;i<local_size;i++) {
2333       d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size);
2334       o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size);
2335     }
2336     ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2337     ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2339     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2340 
2341     /* Set identity on dirichlet dofs */
2342     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2343     for (i=0;i<pcis->n-pcis->n_B;i++) {
2344       PetscScalar one=1.0;
2345       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr);
2346     }
2347     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2348 
2349     /* Set values at interface dofs */
2350     done = PETSC_TRUE;
2351     ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2352     if (!done) {
2353       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2354     }
2355     ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2356     ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr);
2357     ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr);
2358     for (i=0;i<nvtxs;i++) {
2359       PetscInt    row,*cols,ncols;
2360       PetscScalar *mat_vals;
2361 
2362       row = idxs_B[i];
2363       ncols = xadj[i+1]-xadj[i];
2364       cols = idxs_mapped+xadj[i];
2365       mat_vals = vals+xadj[i];
2366       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2367     }
2368     ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2369     if (!done) {
2370       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2371     }
2372     ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2373     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2374     ierr = PetscFree(idxs_mapped);CHKERRQ(ierr);
2375     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2376     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2377 
2378     /* check */
2379     if (pcbddc->dbg_flag) {
2380       PetscReal error;
2381       Vec       x,x_change;
2382 
2383       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2384       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2385       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2386       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2387       ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2388       ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2389       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
2390       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2391       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2392       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2393       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2394       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2395       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2396       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr);
2397       ierr = VecDestroy(&x);CHKERRQ(ierr);
2398       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2399     }
2400     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2401   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2402     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2403     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2404   }
2405 
2406   /* set up change of basis context */
2407   if (pcbddc->ChangeOfBasisMatrix) {
2408     PCBDDCChange_ctx change_ctx;
2409 
2410     if (!pcbddc->new_global_mat) {
2411       PetscInt global_size,local_size;
2412 
2413       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2414       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2415       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2416       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2417       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2418       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2419       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2420       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2421       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2422     } else {
2423       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2424       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2425       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2426     }
2427     if (!pcbddc->user_ChangeOfBasisMatrix) {
2428       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2429       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2430     } else {
2431       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2432       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2433     }
2434     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2435     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2436     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2437     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2438   }
2439 
2440   /* get indices in local ordering for vertices and constraints */
2441   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2442     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2443     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2444   }
2445   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2446   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2447   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2448   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2449   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2450   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2451   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2452   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2453   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2454   /* set quantities in PCBDDC data struct */
2455   pcbddc->n_actual_vertices = i;
2456   /* check if a new primal space has been introduced */
2457   pcbddc->new_primal_space_local = PETSC_TRUE;
2458   if (olocal_primal_size == pcbddc->local_primal_size) {
2459     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2460     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2461     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2462   }
2463   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2464   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2465 
2466   /* flush dbg viewer */
2467   if (pcbddc->dbg_flag) {
2468     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2469   }
2470 
2471   /* free workspace */
2472   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2473   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2474   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2475   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2476   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2477   ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2483 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2484 {
2485   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2486   PC_IS       *pcis = (PC_IS*)pc->data;
2487   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2488   PetscInt    ierr,i,vertex_size;
2489   PetscViewer viewer=pcbddc->dbg_viewer;
2490 
2491   PetscFunctionBegin;
2492   /* Reset previously computed graph */
2493   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2494   /* Init local Graph struct */
2495   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2496 
2497   /* Check validity of the csr graph passed in by the user */
2498   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2499     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2500   }
2501 
2502   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2503   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2504     Mat       mat_adj;
2505     PetscInt  *xadj,*adjncy;
2506     PetscInt  nvtxs;
2507     PetscBool flg_row=PETSC_TRUE;
2508 
2509     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2510     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2511     if (!flg_row) {
2512       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2513     }
2514     if (pcbddc->use_local_adj) {
2515       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2516       pcbddc->deluxe_compute_rowadj = PETSC_FALSE;
2517     } else { /* just compute subdomain's connected components */
2518       IS                     is_dummy;
2519       ISLocalToGlobalMapping l2gmap_dummy;
2520       PetscInt               j,sum;
2521       PetscInt               *cxadj,*cadjncy;
2522       const PetscInt         *idxs;
2523       PCBDDCGraph            graph;
2524       PetscBT                is_on_boundary;
2525 
2526       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2527       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2528       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2529       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2530       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2531       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2532       graph->xadj = xadj;
2533       graph->adjncy = adjncy;
2534       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2535       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2536 
2537       if (pcbddc->dbg_flag) {
2538         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2539         for (i=0;i<graph->ncc;i++) {
2540           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2541         }
2542         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2543       }
2544 
2545       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2546       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2547       for (i=0;i<pcis->n_B;i++) {
2548         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2549       }
2550       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2551 
2552       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2553       sum = 0;
2554       for (i=0;i<graph->ncc;i++) {
2555         PetscInt sizecc = 0;
2556         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2557           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2558             sizecc++;
2559           }
2560         }
2561         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2562           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2563             cxadj[graph->queue[j]] = sizecc;
2564           }
2565         }
2566         sum += sizecc*sizecc;
2567       }
2568       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2569       sum = 0;
2570       for (i=0;i<nvtxs;i++) {
2571         PetscInt temp = cxadj[i];
2572         cxadj[i] = sum;
2573         sum += temp;
2574       }
2575       cxadj[nvtxs] = sum;
2576       for (i=0;i<graph->ncc;i++) {
2577         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2578           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2579             PetscInt k,sizecc = 0;
2580             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2581               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2582                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2583                 sizecc++;
2584               }
2585             }
2586           }
2587         }
2588       }
2589       if (nvtxs) {
2590         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2591       } else {
2592         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2593         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2594       }
2595       graph->xadj = 0;
2596       graph->adjncy = 0;
2597       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2598       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2599     }
2600     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2601     if (!flg_row) {
2602       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2603     }
2604     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2605   }
2606 
2607   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2608   vertex_size = 1;
2609   if (pcbddc->user_provided_isfordofs) {
2610     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2611       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2612       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2613         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2614         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2615       }
2616       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2617       pcbddc->n_ISForDofs = 0;
2618       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2619     }
2620     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2621     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2622   } else {
2623     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2624       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2625       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2626       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2627         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2628       }
2629     }
2630   }
2631 
2632   /* Setup of Graph */
2633   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2634     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2635   }
2636   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2637     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2638   }
2639   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2640 
2641   /* Graph's connected components analysis */
2642   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2643 
2644   /* print some info to stdout */
2645   if (pcbddc->dbg_flag) {
2646     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2647   }
2648 
2649   /* mark topography has done */
2650   pcbddc->recompute_topography = PETSC_FALSE;
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 #undef __FUNCT__
2655 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2656 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2657 {
2658   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2659   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2660   PetscErrorCode ierr;
2661 
2662   PetscFunctionBegin;
2663   n = 0;
2664   vertices = 0;
2665   if (pcbddc->ConstraintMatrix) {
2666     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2667     for (i=0;i<local_primal_size;i++) {
2668       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2669       if (size_of_constraint == 1) n++;
2670       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2671     }
2672     if (vertices_idx) {
2673       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2674       n = 0;
2675       for (i=0;i<local_primal_size;i++) {
2676         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2677         if (size_of_constraint == 1) {
2678           vertices[n++]=row_cmat_indices[0];
2679         }
2680         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2681       }
2682     }
2683   }
2684   *n_vertices = n;
2685   if (vertices_idx) *vertices_idx = vertices;
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 #undef __FUNCT__
2690 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2691 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2692 {
2693   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2694   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2695   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2696   PetscBT        touched;
2697   PetscErrorCode ierr;
2698 
2699     /* This function assumes that the number of local constraints per connected component
2700        is not greater than the number of nodes defined for the connected component
2701        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2702   PetscFunctionBegin;
2703   n = 0;
2704   constraints_index = 0;
2705   if (pcbddc->ConstraintMatrix) {
2706     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2707     max_size_of_constraint = 0;
2708     for (i=0;i<local_primal_size;i++) {
2709       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2710       if (size_of_constraint > 1) {
2711         n++;
2712       }
2713       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2714       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2715     }
2716     if (constraints_idx) {
2717       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2718       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2719       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2720       n = 0;
2721       for (i=0;i<local_primal_size;i++) {
2722         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2723         if (size_of_constraint > 1) {
2724           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2725           /* find first untouched local node */
2726           j = 0;
2727           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2728           min_index = row_cmat_global_indices[j];
2729           min_loc = j;
2730           /* search the minimum among nodes not yet touched on the connected component
2731              since there can be more than one constraint on a single cc */
2732           for (j=1;j<size_of_constraint;j++) {
2733             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2734               min_index = row_cmat_global_indices[j];
2735               min_loc = j;
2736             }
2737           }
2738           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2739           constraints_index[n++] = row_cmat_indices[min_loc];
2740         }
2741         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2742       }
2743       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2744       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2745     }
2746   }
2747   *n_constraints = n;
2748   if (constraints_idx) *constraints_idx = constraints_index;
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 #undef __FUNCT__
2753 #define __FUNCT__ "PCBDDCSubsetNumbering"
2754 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2755 {
2756   Vec            local_vec,global_vec;
2757   IS             seqis,paris;
2758   VecScatter     scatter_ctx;
2759   PetscScalar    *array;
2760   PetscInt       *temp_global_dofs;
2761   PetscScalar    globalsum;
2762   PetscInt       i,j,s;
2763   PetscInt       nlocals,first_index,old_index,max_local;
2764   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2765   PetscMPIInt    *dof_sizes,*dof_displs;
2766   PetscBool      first_found;
2767   PetscErrorCode ierr;
2768 
2769   PetscFunctionBegin;
2770   /* mpi buffers */
2771   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2772   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2773   j = ( !rank_prec_comm ? size_prec_comm : 0);
2774   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2775   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2776   /* get maximum size of subset */
2777   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2778   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2779   max_local = 0;
2780   for (i=0;i<n_local_dofs;i++) {
2781     if (max_local < temp_global_dofs[i] ) {
2782       max_local = temp_global_dofs[i];
2783     }
2784   }
2785   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2786   max_global++;
2787   max_local = 0;
2788   for (i=0;i<n_local_dofs;i++) {
2789     if (max_local < local_dofs[i] ) {
2790       max_local = local_dofs[i];
2791     }
2792   }
2793   max_local++;
2794   /* allocate workspace */
2795   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2796   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2797   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2798   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2799   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2800   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2801   /* create scatter */
2802   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2803   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2804   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2805   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2806   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2807   /* init array */
2808   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2809   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2810   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2811   if (local_dofs_mult) {
2812     for (i=0;i<n_local_dofs;i++) {
2813       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2814     }
2815   } else {
2816     for (i=0;i<n_local_dofs;i++) {
2817       array[local_dofs[i]]=1.0;
2818     }
2819   }
2820   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2821   /* scatter into global vec and get total number of global dofs */
2822   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2823   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2824   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2825   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2826   /* Fill global_vec with cumulative function for global numbering */
2827   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2828   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2829   nlocals = 0;
2830   first_index = -1;
2831   first_found = PETSC_FALSE;
2832   for (i=0;i<s;i++) {
2833     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2834       first_found = PETSC_TRUE;
2835       first_index = i;
2836     }
2837     nlocals += (PetscInt)PetscRealPart(array[i]);
2838   }
2839   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2840   if (!rank_prec_comm) {
2841     dof_displs[0]=0;
2842     for (i=1;i<size_prec_comm;i++) {
2843       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2844     }
2845   }
2846   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2847   if (first_found) {
2848     array[first_index] += (PetscScalar)nlocals;
2849     old_index = first_index;
2850     for (i=first_index+1;i<s;i++) {
2851       if (PetscRealPart(array[i]) > 0.1) {
2852         array[i] += array[old_index];
2853         old_index = i;
2854       }
2855     }
2856   }
2857   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2858   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2859   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2860   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2861   /* get global ordering of local dofs */
2862   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2863   if (local_dofs_mult) {
2864     for (i=0;i<n_local_dofs;i++) {
2865       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2866     }
2867   } else {
2868     for (i=0;i<n_local_dofs;i++) {
2869       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2870     }
2871   }
2872   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2873   /* free workspace */
2874   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2875   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2876   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2877   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2878   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2879   /* return pointer to global ordering of local dofs */
2880   *global_numbering_subset = temp_global_dofs;
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 #undef __FUNCT__
2885 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2886 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2887 {
2888   PetscInt       i,j;
2889   PetscScalar    *alphas;
2890   PetscErrorCode ierr;
2891 
2892   PetscFunctionBegin;
2893   /* this implements stabilized Gram-Schmidt */
2894   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2895   for (i=0;i<n;i++) {
2896     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2897     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2898     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2899   }
2900   ierr = PetscFree(alphas);CHKERRQ(ierr);
2901   PetscFunctionReturn(0);
2902 }
2903 
2904 #undef __FUNCT__
2905 #define __FUNCT__ "MatISGetSubassemblingPattern"
2906 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2907 {
2908   Mat             subdomain_adj;
2909   IS              new_ranks,ranks_send_to;
2910   MatPartitioning partitioner;
2911   Mat_IS          *matis;
2912   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2913   PetscInt        prank;
2914   PetscMPIInt     size,rank,color;
2915   PetscInt        *xadj,*adjncy,*oldranks;
2916   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2917   PetscInt        i,local_size,threshold=0;
2918   PetscErrorCode  ierr;
2919   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2920   PetscSubcomm    subcomm;
2921 
2922   PetscFunctionBegin;
2923   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2924   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2925   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2926 
2927   /* Get info on mapping */
2928   matis = (Mat_IS*)(mat->data);
2929   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2930   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2931 
2932   /* build local CSR graph of subdomains' connectivity */
2933   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2934   xadj[0] = 0;
2935   xadj[1] = PetscMax(n_neighs-1,0);
2936   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2937   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2938 
2939   if (threshold) {
2940     PetscInt xadj_count = 0;
2941     for (i=1;i<n_neighs;i++) {
2942       if (n_shared[i] > threshold) {
2943         adjncy[xadj_count] = neighs[i];
2944         adjncy_wgt[xadj_count] = n_shared[i];
2945         xadj_count++;
2946       }
2947     }
2948     xadj[1] = xadj_count;
2949   } else {
2950     if (xadj[1]) {
2951       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2952       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2953     }
2954   }
2955   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2956   if (use_square) {
2957     for (i=0;i<xadj[1];i++) {
2958       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2959     }
2960   }
2961   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2962 
2963   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
2964 
2965   /*
2966     Restrict work on active processes only.
2967   */
2968   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2969   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2970   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2971   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2972   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2973   if (color) {
2974     ierr = PetscFree(xadj);CHKERRQ(ierr);
2975     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2976     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2977   } else {
2978     PetscInt coarsening_ratio;
2979     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2980     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2981     prank = rank;
2982     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2983     /*
2984     for (i=0;i<size;i++) {
2985       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2986     }
2987     */
2988     for (i=0;i<xadj[1];i++) {
2989       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2990     }
2991     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2992     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2993     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2994 
2995     /* Partition */
2996     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2997     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2998     if (use_vwgt) {
2999       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3000       v_wgt[0] = local_size;
3001       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3002     }
3003     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3004     coarsening_ratio = size/n_subdomains;
3005     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3006     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3007     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3008     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3009 
3010     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3011     if (contiguous) {
3012       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3013     } else {
3014       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3015     }
3016     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3017     /* clean up */
3018     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3019     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3020     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3021     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3022   }
3023   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3024 
3025   /* assemble parallel IS for sends */
3026   i = 1;
3027   if (color) i=0;
3028   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3029 
3030   /* get back IS */
3031   *is_sends = ranks_send_to;
3032   PetscFunctionReturn(0);
3033 }
3034 
3035 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3036 
3037 #undef __FUNCT__
3038 #define __FUNCT__ "MatISSubassemble"
3039 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3040 {
3041   Mat                    local_mat;
3042   Mat_IS                 *matis;
3043   IS                     is_sends_internal;
3044   PetscInt               rows,cols,new_local_rows;
3045   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3046   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3047   ISLocalToGlobalMapping l2gmap;
3048   PetscInt*              l2gmap_indices;
3049   const PetscInt*        is_indices;
3050   MatType                new_local_type;
3051   /* buffers */
3052   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3053   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3054   PetscInt               *recv_buffer_idxs_local;
3055   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3056   /* MPI */
3057   MPI_Comm               comm,comm_n;
3058   PetscSubcomm           subcomm;
3059   PetscMPIInt            n_sends,n_recvs,commsize;
3060   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3061   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3062   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3063   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3064   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3065   PetscErrorCode         ierr;
3066 
3067   PetscFunctionBegin;
3068   /* TODO: add missing checks */
3069   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3070   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3071   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3072   PetscValidLogicalCollectiveInt(mat,nis,7);
3073   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3074   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3075   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3076   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3077   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3078   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3079   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3080   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3081     PetscInt mrows,mcols,mnrows,mncols;
3082     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3083     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3084     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3085     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3086     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3087     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3088   }
3089   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3090   PetscValidLogicalCollectiveInt(mat,bs,0);
3091   /* prepare IS for sending if not provided */
3092   if (!is_sends) {
3093     PetscBool pcontig = PETSC_TRUE;
3094     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3095     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3096   } else {
3097     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3098     is_sends_internal = is_sends;
3099   }
3100 
3101   /* get pointer of MATIS data */
3102   matis = (Mat_IS*)mat->data;
3103 
3104   /* get comm */
3105   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3106 
3107   /* compute number of sends */
3108   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3109   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3110 
3111   /* compute number of receives */
3112   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3113   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3114   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3115   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3116   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3117   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3118   ierr = PetscFree(iflags);CHKERRQ(ierr);
3119 
3120   /* restrict comm if requested */
3121   subcomm = 0;
3122   destroy_mat = PETSC_FALSE;
3123   if (restrict_comm) {
3124     PetscMPIInt color,rank,subcommsize;
3125     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3126     color = 0;
3127     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3128     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3129     subcommsize = commsize - subcommsize;
3130     /* check if reuse has been requested */
3131     if (reuse == MAT_REUSE_MATRIX) {
3132       if (*mat_n) {
3133         PetscMPIInt subcommsize2;
3134         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3135         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3136         comm_n = PetscObjectComm((PetscObject)*mat_n);
3137       } else {
3138         comm_n = PETSC_COMM_SELF;
3139       }
3140     } else { /* MAT_INITIAL_MATRIX */
3141       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3142       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3143       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3144       comm_n = subcomm->comm;
3145     }
3146     /* flag to destroy *mat_n if not significative */
3147     if (color) destroy_mat = PETSC_TRUE;
3148   } else {
3149     comm_n = comm;
3150   }
3151 
3152   /* prepare send/receive buffers */
3153   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3154   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3155   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3156   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3157   if (nis) {
3158     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3159   }
3160 
3161   /* Get data from local matrices */
3162   if (!isdense) {
3163     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3164     /* TODO: See below some guidelines on how to prepare the local buffers */
3165     /*
3166        send_buffer_vals should contain the raw values of the local matrix
3167        send_buffer_idxs should contain:
3168        - MatType_PRIVATE type
3169        - PetscInt        size_of_l2gmap
3170        - PetscInt        global_row_indices[size_of_l2gmap]
3171        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3172     */
3173   } else {
3174     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3175     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3176     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3177     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3178     send_buffer_idxs[1] = i;
3179     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3180     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3181     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3182     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3183     for (i=0;i<n_sends;i++) {
3184       ilengths_vals[is_indices[i]] = len*len;
3185       ilengths_idxs[is_indices[i]] = len+2;
3186     }
3187   }
3188   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3189   /* additional is (if any) */
3190   if (nis) {
3191     PetscMPIInt psum;
3192     PetscInt j;
3193     for (j=0,psum=0;j<nis;j++) {
3194       PetscInt plen;
3195       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3196       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3197       psum += len+1; /* indices + lenght */
3198     }
3199     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3200     for (j=0,psum=0;j<nis;j++) {
3201       PetscInt plen;
3202       const PetscInt *is_array_idxs;
3203       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3204       send_buffer_idxs_is[psum] = plen;
3205       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3206       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3207       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3208       psum += plen+1; /* indices + lenght */
3209     }
3210     for (i=0;i<n_sends;i++) {
3211       ilengths_idxs_is[is_indices[i]] = psum;
3212     }
3213     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3214   }
3215 
3216   buf_size_idxs = 0;
3217   buf_size_vals = 0;
3218   buf_size_idxs_is = 0;
3219   for (i=0;i<n_recvs;i++) {
3220     buf_size_idxs += (PetscInt)olengths_idxs[i];
3221     buf_size_vals += (PetscInt)olengths_vals[i];
3222     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3223   }
3224   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3225   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3226   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3227 
3228   /* get new tags for clean communications */
3229   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3230   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3231   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3232 
3233   /* allocate for requests */
3234   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3235   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3236   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3237   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3238   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3239   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3240 
3241   /* communications */
3242   ptr_idxs = recv_buffer_idxs;
3243   ptr_vals = recv_buffer_vals;
3244   ptr_idxs_is = recv_buffer_idxs_is;
3245   for (i=0;i<n_recvs;i++) {
3246     source_dest = onodes[i];
3247     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3248     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3249     ptr_idxs += olengths_idxs[i];
3250     ptr_vals += olengths_vals[i];
3251     if (nis) {
3252       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3253       ptr_idxs_is += olengths_idxs_is[i];
3254     }
3255   }
3256   for (i=0;i<n_sends;i++) {
3257     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3258     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3259     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3260     if (nis) {
3261       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3262     }
3263   }
3264   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3265   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3266 
3267   /* assemble new l2g map */
3268   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3269   ptr_idxs = recv_buffer_idxs;
3270   new_local_rows = 0;
3271   for (i=0;i<n_recvs;i++) {
3272     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3273     ptr_idxs += olengths_idxs[i];
3274   }
3275   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3276   ptr_idxs = recv_buffer_idxs;
3277   new_local_rows = 0;
3278   for (i=0;i<n_recvs;i++) {
3279     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3280     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3281     ptr_idxs += olengths_idxs[i];
3282   }
3283   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3284   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3285   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3286 
3287   /* infer new local matrix type from received local matrices type */
3288   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3289   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3290   if (n_recvs) {
3291     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3292     ptr_idxs = recv_buffer_idxs;
3293     for (i=0;i<n_recvs;i++) {
3294       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3295         new_local_type_private = MATAIJ_PRIVATE;
3296         break;
3297       }
3298       ptr_idxs += olengths_idxs[i];
3299     }
3300     switch (new_local_type_private) {
3301       case MATDENSE_PRIVATE:
3302         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3303           new_local_type = MATSEQAIJ;
3304           bs = 1;
3305         } else { /* if I receive only 1 dense matrix */
3306           new_local_type = MATSEQDENSE;
3307           bs = 1;
3308         }
3309         break;
3310       case MATAIJ_PRIVATE:
3311         new_local_type = MATSEQAIJ;
3312         bs = 1;
3313         break;
3314       case MATBAIJ_PRIVATE:
3315         new_local_type = MATSEQBAIJ;
3316         break;
3317       case MATSBAIJ_PRIVATE:
3318         new_local_type = MATSEQSBAIJ;
3319         break;
3320       default:
3321         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3322         break;
3323     }
3324   } else { /* by default, new_local_type is seqdense */
3325     new_local_type = MATSEQDENSE;
3326     bs = 1;
3327   }
3328 
3329   /* create MATIS object if needed */
3330   if (reuse == MAT_INITIAL_MATRIX) {
3331     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3332     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3333   } else {
3334     /* it also destroys the local matrices */
3335     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3336   }
3337   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3338   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3339 
3340   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3341 
3342   /* Global to local map of received indices */
3343   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3344   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3345   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3346 
3347   /* restore attributes -> type of incoming data and its size */
3348   buf_size_idxs = 0;
3349   for (i=0;i<n_recvs;i++) {
3350     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3351     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3352     buf_size_idxs += (PetscInt)olengths_idxs[i];
3353   }
3354   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3355 
3356   /* set preallocation */
3357   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3358   if (!newisdense) {
3359     PetscInt *new_local_nnz=0;
3360 
3361     ptr_vals = recv_buffer_vals;
3362     ptr_idxs = recv_buffer_idxs_local;
3363     if (n_recvs) {
3364       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3365     }
3366     for (i=0;i<n_recvs;i++) {
3367       PetscInt j;
3368       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3369         for (j=0;j<*(ptr_idxs+1);j++) {
3370           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3371         }
3372       } else {
3373         /* TODO */
3374       }
3375       ptr_idxs += olengths_idxs[i];
3376     }
3377     if (new_local_nnz) {
3378       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3379       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3380       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3381       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3382       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3383       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3384     } else {
3385       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3386     }
3387     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3388   } else {
3389     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3390   }
3391 
3392   /* set values */
3393   ptr_vals = recv_buffer_vals;
3394   ptr_idxs = recv_buffer_idxs_local;
3395   for (i=0;i<n_recvs;i++) {
3396     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3397       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3398       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3399       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3400       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3401       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3402     } else {
3403       /* TODO */
3404     }
3405     ptr_idxs += olengths_idxs[i];
3406     ptr_vals += olengths_vals[i];
3407   }
3408   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3409   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3410   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3411   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3412   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3413   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3414 
3415 #if 0
3416   if (!restrict_comm) { /* check */
3417     Vec       lvec,rvec;
3418     PetscReal infty_error;
3419 
3420     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3421     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3422     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3423     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3424     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3425     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3426     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3427     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3428     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3429   }
3430 #endif
3431 
3432   /* assemble new additional is (if any) */
3433   if (nis) {
3434     PetscInt **temp_idxs,*count_is,j,psum;
3435 
3436     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3437     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3438     ptr_idxs = recv_buffer_idxs_is;
3439     psum = 0;
3440     for (i=0;i<n_recvs;i++) {
3441       for (j=0;j<nis;j++) {
3442         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3443         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3444         psum += plen;
3445         ptr_idxs += plen+1; /* shift pointer to received data */
3446       }
3447     }
3448     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3449     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3450     for (i=1;i<nis;i++) {
3451       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3452     }
3453     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3454     ptr_idxs = recv_buffer_idxs_is;
3455     for (i=0;i<n_recvs;i++) {
3456       for (j=0;j<nis;j++) {
3457         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3458         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3459         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3460         ptr_idxs += plen+1; /* shift pointer to received data */
3461       }
3462     }
3463     for (i=0;i<nis;i++) {
3464       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3465       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3466       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3467     }
3468     ierr = PetscFree(count_is);CHKERRQ(ierr);
3469     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3470     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3471   }
3472   /* free workspace */
3473   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3474   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3475   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3476   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3477   if (isdense) {
3478     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3479     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3480   } else {
3481     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3482   }
3483   if (nis) {
3484     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3485     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3486   }
3487   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3488   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3489   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3490   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3491   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3492   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3493   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3494   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3495   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3496   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3497   ierr = PetscFree(onodes);CHKERRQ(ierr);
3498   if (nis) {
3499     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3500     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3501     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3502   }
3503   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3504   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3505     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3506     for (i=0;i<nis;i++) {
3507       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3508     }
3509   }
3510   PetscFunctionReturn(0);
3511 }
3512 
3513 /* temporary hack into ksp private data structure */
3514 #include <petsc-private/kspimpl.h>
3515 
3516 #undef __FUNCT__
3517 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3518 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3519 {
3520   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3521   PC_IS                  *pcis = (PC_IS*)pc->data;
3522   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3523   MatNullSpace           CoarseNullSpace=NULL;
3524   ISLocalToGlobalMapping coarse_islg;
3525   IS                     coarse_is,*isarray;
3526   PetscInt               i,im_active=-1,active_procs=-1;
3527   PetscInt               nis,nisdofs,nisneu;
3528   PC                     pc_temp;
3529   PCType                 coarse_pc_type;
3530   KSPType                coarse_ksp_type;
3531   PetscBool              multilevel_requested,multilevel_allowed;
3532   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3533   Mat                    t_coarse_mat_is;
3534   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3535   PetscMPIInt            all_procs;
3536   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3537   PetscBool              compute_vecs = PETSC_FALSE;
3538   PetscScalar            *array;
3539   PetscErrorCode         ierr;
3540 
3541   PetscFunctionBegin;
3542   /* Assign global numbering to coarse dofs */
3543   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3544     compute_vecs = PETSC_TRUE;
3545     PetscInt ocoarse_size;
3546     ocoarse_size = pcbddc->coarse_size;
3547     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3548     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3549     /* see if we can avoid some work */
3550     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3551       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3552         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3553         coarse_reuse = PETSC_FALSE;
3554       } else { /* we can safely reuse already computed coarse matrix */
3555         coarse_reuse = PETSC_TRUE;
3556       }
3557     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3558       coarse_reuse = PETSC_FALSE;
3559     }
3560     /* reset any subassembling information */
3561     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3562     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3563   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3564     coarse_reuse = PETSC_TRUE;
3565   }
3566 
3567   /* count "active" (i.e. with positive local size) and "void" processes */
3568   im_active = !!(pcis->n);
3569   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3570   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3571   void_procs = all_procs-active_procs;
3572   csin_type_simple = PETSC_TRUE;
3573   redist = PETSC_FALSE;
3574   if (pcbddc->current_level && void_procs) {
3575     csin_ml = PETSC_TRUE;
3576     ncoarse_ml = void_procs;
3577     csin_ds = PETSC_TRUE;
3578     ncoarse_ds = void_procs;
3579   } else {
3580     csin_ml = PETSC_FALSE;
3581     ncoarse_ml = all_procs;
3582     if (void_procs) {
3583       csin_ds = PETSC_TRUE;
3584       ncoarse_ds = void_procs;
3585       csin_type_simple = PETSC_FALSE;
3586     } else {
3587       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3588         csin_ds = PETSC_TRUE;
3589         ncoarse_ds = pcbddc->redistribute_coarse;
3590         redist = PETSC_TRUE;
3591       } else {
3592         csin_ds = PETSC_FALSE;
3593         ncoarse_ds = all_procs;
3594       }
3595     }
3596   }
3597 
3598   /*
3599     test if we can go multilevel: three conditions must be satisfied:
3600     - we have not exceeded the number of levels requested
3601     - we can actually subassemble the active processes
3602     - we can find a suitable number of MPI processes where we can place the subassembled problem
3603   */
3604   multilevel_allowed = PETSC_FALSE;
3605   multilevel_requested = PETSC_FALSE;
3606   if (pcbddc->current_level < pcbddc->max_levels) {
3607     multilevel_requested = PETSC_TRUE;
3608     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3609       multilevel_allowed = PETSC_FALSE;
3610     } else {
3611       multilevel_allowed = PETSC_TRUE;
3612     }
3613   }
3614   /* determine number of process partecipating to coarse solver */
3615   if (multilevel_allowed) {
3616     ncoarse = ncoarse_ml;
3617     csin = csin_ml;
3618   } else {
3619     ncoarse = ncoarse_ds;
3620     csin = csin_ds;
3621   }
3622 
3623   /* creates temporary l2gmap and IS for coarse indexes */
3624   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3625   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3626 
3627   /* creates temporary MATIS object for coarse matrix */
3628   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3629   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3630   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3631   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3632 #if 0
3633   {
3634     PetscViewer viewer;
3635     char filename[256];
3636     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3637     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3638     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3639     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3640     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3641   }
3642 #endif
3643   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3644   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3645   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3646   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3647   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3648 
3649   /* compute dofs splitting and neumann boundaries for coarse dofs */
3650   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3651     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3652     const PetscInt         *idxs;
3653     ISLocalToGlobalMapping tmap;
3654 
3655     /* create map between primal indices (in local representative ordering) and local primal numbering */
3656     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3657     /* allocate space for temporary storage */
3658     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3659     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3660     /* allocate for IS array */
3661     nisdofs = pcbddc->n_ISForDofsLocal;
3662     nisneu = !!pcbddc->NeumannBoundariesLocal;
3663     nis = nisdofs + nisneu;
3664     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3665     /* dofs splitting */
3666     for (i=0;i<nisdofs;i++) {
3667       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3668       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3669       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3670       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3671       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3672       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3673       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3674       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3675     }
3676     /* neumann boundaries */
3677     if (pcbddc->NeumannBoundariesLocal) {
3678       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3679       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3680       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3681       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3682       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3683       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3684       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3685       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3686     }
3687     /* free memory */
3688     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3689     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3690     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3691   } else {
3692     nis = 0;
3693     nisdofs = 0;
3694     nisneu = 0;
3695     isarray = NULL;
3696   }
3697   /* destroy no longer needed map */
3698   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3699 
3700   /* restrict on coarse candidates (if needed) */
3701   coarse_mat_is = NULL;
3702   if (csin) {
3703     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3704       if (redist) {
3705         PetscMPIInt rank;
3706         PetscInt spc,n_spc_p1,dest[1];
3707 
3708         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3709         spc = all_procs/pcbddc->redistribute_coarse;
3710         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
3711         if (rank > n_spc_p1*(spc+1)-1) {
3712           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3713         } else {
3714           dest[0] = rank/(spc+1);
3715         }
3716         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3717       } else {
3718         PetscInt j,tissize,*nisindices;
3719         PetscInt *coarse_candidates;
3720         const PetscInt* tisindices;
3721         /* get coarse candidates' ranks in pc communicator */
3722         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
3723         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3724         for (i=0,j=0;i<all_procs;i++) {
3725           if (!coarse_candidates[i]) {
3726             coarse_candidates[j]=i;
3727             j++;
3728           }
3729         }
3730         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3731         /* get a suitable subassembling pattern */
3732         if (csin_type_simple) {
3733           PetscMPIInt rank;
3734           PetscInt    issize,isidx;
3735           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3736           if (im_active) {
3737             issize = 1;
3738             isidx = (PetscInt)rank;
3739           } else {
3740             issize = 0;
3741             isidx = -1;
3742           }
3743           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3744         } else {
3745           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3746         }
3747         if (pcbddc->dbg_flag) {
3748           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3749           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3750           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3751           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3752           for (i=0;i<j;i++) {
3753             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3754           }
3755           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3756           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3757         }
3758         /* shift the pattern on coarse candidates */
3759         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3760         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3761         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
3762         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3763         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3764         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3765         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3766       }
3767     }
3768     if (pcbddc->dbg_flag) {
3769       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3770       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3771       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3772       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3773     }
3774     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3775     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3776   } else {
3777     if (pcbddc->dbg_flag) {
3778       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3779       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3780       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3781     }
3782     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3783     coarse_mat_is = t_coarse_mat_is;
3784   }
3785 
3786   /* create local to global scatters for coarse problem */
3787   if (compute_vecs) {
3788     PetscInt lrows;
3789     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3790     if (coarse_mat_is) {
3791       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3792     } else {
3793       lrows = 0;
3794     }
3795     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3796     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3797     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3798     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3799     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3800   }
3801   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3802   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3803 
3804   /* set defaults for coarse KSP and PC */
3805   if (multilevel_allowed) {
3806     coarse_ksp_type = KSPRICHARDSON;
3807     coarse_pc_type = PCBDDC;
3808   } else {
3809     coarse_ksp_type = KSPPREONLY;
3810     coarse_pc_type = PCREDUNDANT;
3811   }
3812 
3813   /* print some info if requested */
3814   if (pcbddc->dbg_flag) {
3815     if (!multilevel_allowed) {
3816       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3817       if (multilevel_requested) {
3818         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3819       } else if (pcbddc->max_levels) {
3820         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3821       }
3822       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3823     }
3824   }
3825 
3826   /* create the coarse KSP object only once with defaults */
3827   if (coarse_mat_is) {
3828     MatReuse coarse_mat_reuse;
3829     PetscViewer dbg_viewer = NULL;
3830     if (pcbddc->dbg_flag) {
3831       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3832       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3833     }
3834     if (!pcbddc->coarse_ksp) {
3835       char prefix[256],str_level[16];
3836       size_t len;
3837       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3838       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3839       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3840       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3841       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3842       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3843       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3844       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3845       /* prefix */
3846       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3847       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3848       if (!pcbddc->current_level) {
3849         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3850         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3851       } else {
3852         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3853         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3854         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3855         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3856         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3857         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3858       }
3859       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3860       /* allow user customization */
3861       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3862       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3863     }
3864 
3865     /* get some info after set from options */
3866     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3867     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3868     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3869     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3870     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3871       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3872       isbddc = PETSC_FALSE;
3873     }
3874     if (isredundant) {
3875       KSP inner_ksp;
3876       PC inner_pc;
3877       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3878       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3879       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3880     }
3881 
3882     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3883     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3884     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3885     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3886     if (nisdofs) {
3887       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3888       for (i=0;i<nisdofs;i++) {
3889         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3890       }
3891     }
3892     if (nisneu) {
3893       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3894       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3895     }
3896 
3897     /* assemble coarse matrix */
3898     if (coarse_reuse) {
3899       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3900       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3901       coarse_mat_reuse = MAT_REUSE_MATRIX;
3902     } else {
3903       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3904     }
3905     if (isbddc || isnn) {
3906       if (pcbddc->coarsening_ratio > 1) {
3907         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3908           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3909           if (pcbddc->dbg_flag) {
3910             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3911             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3912             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3913             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3914           }
3915         }
3916         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3917       } else {
3918         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
3919         coarse_mat = coarse_mat_is;
3920       }
3921     } else {
3922       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3923     }
3924     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3925 
3926     /* propagate symmetry info to coarse matrix */
3927     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3928     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3929 
3930     /* set operators */
3931     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3932     if (pcbddc->dbg_flag) {
3933       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3934     }
3935   } else { /* processes non partecipating to coarse solver (if any) */
3936     coarse_mat = 0;
3937   }
3938   ierr = PetscFree(isarray);CHKERRQ(ierr);
3939 #if 0
3940   {
3941     PetscViewer viewer;
3942     char filename[256];
3943     sprintf(filename,"coarse_mat.m");
3944     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3945     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3946     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3947     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3948   }
3949 #endif
3950 
3951   /* Compute coarse null space (special handling by BDDC only) */
3952   if (pcbddc->NullSpace) {
3953     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3954   }
3955 
3956   if (pcbddc->coarse_ksp) {
3957     Vec crhs,csol;
3958     PetscBool ispreonly;
3959     if (CoarseNullSpace) {
3960       if (isbddc) {
3961         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3962       } else {
3963         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3964       }
3965     }
3966     /* setup coarse ksp */
3967     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3968     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3969     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3970     /* hack */
3971     if (!csol) {
3972       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3973     }
3974     if (!crhs) {
3975       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3976     }
3977     /* Check coarse problem if in debug mode or if solving with an iterative method */
3978     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3979     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3980       KSP       check_ksp;
3981       KSPType   check_ksp_type;
3982       PC        check_pc;
3983       Vec       check_vec,coarse_vec;
3984       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3985       PetscInt  its;
3986       PetscBool compute_eigs;
3987       PetscReal *eigs_r,*eigs_c;
3988       PetscInt  neigs;
3989       const char *prefix;
3990 
3991       /* Create ksp object suitable for estimation of extreme eigenvalues */
3992       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3993       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3994       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3995       if (ispreonly) {
3996         check_ksp_type = KSPPREONLY;
3997         compute_eigs = PETSC_FALSE;
3998       } else {
3999         check_ksp_type = KSPGMRES;
4000         compute_eigs = PETSC_TRUE;
4001       }
4002       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4003       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4004       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4005       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4006       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4007       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4008       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4009       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4010       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4011       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4012       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4013       /* create random vec */
4014       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4015       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4016       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4017       if (CoarseNullSpace) {
4018         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4019       }
4020       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4021       /* solve coarse problem */
4022       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4023       if (CoarseNullSpace) {
4024         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4025       }
4026       /* set eigenvalue estimation if preonly has not been requested */
4027       if (compute_eigs) {
4028         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4029         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4030         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4031         lambda_max = eigs_r[neigs-1];
4032         lambda_min = eigs_r[0];
4033         if (pcbddc->use_coarse_estimates) {
4034           if (lambda_max>lambda_min) {
4035             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4036             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4037           }
4038         }
4039       }
4040 
4041       /* check coarse problem residual error */
4042       if (pcbddc->dbg_flag) {
4043         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4044         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4045         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4046         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4047         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4048         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4049         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4050         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4051         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4052         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4053         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4054         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4055         if (compute_eigs) {
4056           PetscReal lambda_max_s,lambda_min_s;
4057           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4058           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4059           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4060           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4061           for (i=0;i<neigs;i++) {
4062             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4063           }
4064         }
4065         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4066         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4067       }
4068       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4069       if (compute_eigs) {
4070         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4071         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4072       }
4073     }
4074   }
4075   /* print additional info */
4076   if (pcbddc->dbg_flag) {
4077     /* waits until all processes reaches this point */
4078     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4079     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4080     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4081   }
4082 
4083   /* free memory */
4084   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4085   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4086   PetscFunctionReturn(0);
4087 }
4088 
4089 #undef __FUNCT__
4090 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4091 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4092 {
4093   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4094   PC_IS*         pcis = (PC_IS*)pc->data;
4095   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4096   PetscInt       i,coarse_size;
4097   PetscInt       *local_primal_indices;
4098   PetscErrorCode ierr;
4099 
4100   PetscFunctionBegin;
4101   /* Compute global number of coarse dofs */
4102   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4103     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4104   }
4105   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4106 
4107   /* check numbering */
4108   if (pcbddc->dbg_flag) {
4109     PetscScalar coarsesum,*array;
4110     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4111 
4112     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4113     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4114     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4115     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4116     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4117     for (i=0;i<pcbddc->local_primal_size;i++) {
4118       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4119     }
4120     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4121     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4122     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4123     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4124     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4125     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4126     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4127     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4128     for (i=0;i<pcis->n;i++) {
4129       if (array[i] == 1.0) {
4130         set_error = PETSC_TRUE;
4131         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4132       }
4133     }
4134     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4135     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4136     for (i=0;i<pcis->n;i++) {
4137       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4138     }
4139     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4140     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4141     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4142     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4143     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4144     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4145     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4146       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4147       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4148       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4149       for (i=0;i<pcbddc->local_primal_size;i++) {
4150         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4151       }
4152       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4153     }
4154     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4155     if (set_error_reduced) {
4156       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4157     }
4158   }
4159   /* get back data */
4160   *coarse_size_n = coarse_size;
4161   *local_primal_indices_n = local_primal_indices;
4162   PetscFunctionReturn(0);
4163 }
4164 
4165 #undef __FUNCT__
4166 #define __FUNCT__ "PCBDDCGlobalToLocal"
4167 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4168 {
4169   IS             localis_t;
4170   PetscInt       i,lsize,*idxs,n;
4171   PetscScalar    *vals;
4172   PetscErrorCode ierr;
4173 
4174   PetscFunctionBegin;
4175   /* get indices in local ordering exploiting local to global map */
4176   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4177   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4178   for (i=0;i<lsize;i++) vals[i] = 1.0;
4179   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4180   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4181   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4182   if (idxs) { /* multilevel guard */
4183     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4184   }
4185   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4186   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4187   ierr = PetscFree(vals);CHKERRQ(ierr);
4188   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4189   /* now compute set in local ordering */
4190   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4191   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4192   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4193   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4194   for (i=0,lsize=0;i<n;i++) {
4195     if (PetscRealPart(vals[i]) > 0.5) {
4196       lsize++;
4197     }
4198   }
4199   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4200   for (i=0,lsize=0;i<n;i++) {
4201     if (PetscRealPart(vals[i]) > 0.5) {
4202       idxs[lsize++] = i;
4203     }
4204   }
4205   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4206   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4207   *localis = localis_t;
4208   PetscFunctionReturn(0);
4209 }
4210 
4211 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4212 #undef __FUNCT__
4213 #define __FUNCT__ "PCBDDCMatMult_Private"
4214 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4215 {
4216   PCBDDCChange_ctx change_ctx;
4217   PetscErrorCode   ierr;
4218 
4219   PetscFunctionBegin;
4220   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4221   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4222   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4223   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4224   PetscFunctionReturn(0);
4225 }
4226 
4227 #undef __FUNCT__
4228 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4229 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4230 {
4231   PCBDDCChange_ctx change_ctx;
4232   PetscErrorCode   ierr;
4233 
4234   PetscFunctionBegin;
4235   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4236   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4237   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4238   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4239   PetscFunctionReturn(0);
4240 }
4241