xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision f1e55ea32162cf9a365715cdae2babe91dbef974)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
111   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
112   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
118 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
119 {
120   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
121   PC_IS          *pcis = (PC_IS*)pc->data;
122   VecType        impVecType;
123   PetscInt       n_constraints,n_R,old_size;
124   PetscErrorCode ierr;
125 
126   PetscFunctionBegin;
127   if (!pcbddc->ConstraintMatrix) {
128     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
129   }
130   /* get sizes */
131   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
132   n_R = pcis->n-pcbddc->n_actual_vertices;
133   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
134   /* local work vectors (try to avoid unneeded work)*/
135   /* R nodes */
136   old_size = -1;
137   if (pcbddc->vec1_R) {
138     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
139   }
140   if (n_R != old_size) {
141     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
142     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
143     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
144     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
145     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
146     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
147   }
148   /* local primal dofs */
149   old_size = -1;
150   if (pcbddc->vec1_P) {
151     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
152   }
153   if (pcbddc->local_primal_size != old_size) {
154     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
156     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
157     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
158   }
159   /* local explicit constraints */
160   old_size = -1;
161   if (pcbddc->vec1_C) {
162     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
163   }
164   if (n_constraints && n_constraints != old_size) {
165     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
167     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
168     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "PCBDDCSetUpCorrection"
175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
176 {
177   PetscErrorCode         ierr;
178   /* pointers to pcis and pcbddc */
179   PC_IS*                 pcis = (PC_IS*)pc->data;
180   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
181   /* submatrices of local problem */
182   Mat                    A_RV,A_VR,A_VV;
183   /* working matrices */
184   Mat                    M1,M2,M3,C_CR;
185   /* working vectors */
186   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
187   /* additional working stuff */
188   IS                     is_aux;
189   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
190   const PetscScalar      *array,*row_cmat_values;
191   const PetscInt         *row_cmat_indices,*idx_R_local;
192   PetscInt               *idx_V_B,*auxindices;
193   PetscInt               n_vertices,n_constraints,size_of_constraint;
194   PetscInt               i,j,n_R,n_D,n_B;
195   PetscBool              unsymmetric_check;
196   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
197   MatType                impMatType;
198   /* some shortcuts to scalars */
199   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
200   /* for debugging purposes */
201   PetscReal              *coarsefunctions_errors,*constraints_errors;
202 
203   PetscFunctionBegin;
204   /* get number of vertices (corners plus constraints with change of basis)
205      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
206   n_vertices = pcbddc->n_actual_vertices;
207   n_constraints = pcbddc->local_primal_size-n_vertices;
208   /* Set Non-overlapping dimensions */
209   n_B = pcis->n_B; n_D = pcis->n - n_B;
210   n_R = pcis->n-n_vertices;
211 
212   /* Set types for local objects needed by BDDC precondtioner */
213   impMatType = MATSEQDENSE;
214 
215   /* Allocating some extra storage just to be safe */
216   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
217   for (i=0;i<pcis->n;i++) auxindices[i]=i;
218 
219   /* vertices in boundary numbering */
220   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
221   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
222   if (i != n_vertices) {
223     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
224   }
225 
226   /* Precompute stuffs needed for preprocessing and application of BDDC*/
227   if (n_constraints) {
228     /* see if we can save some allocations */
229     if (pcbddc->local_auxmat2) {
230       PetscInt on_R,on_constraints;
231       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
232       if (on_R != n_R || on_constraints != n_constraints) {
233         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
234         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
235       }
236     }
237     /* work vectors */
238     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
239     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
240     /* auxiliary matrices */
241     if (!pcbddc->local_auxmat2) {
242       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
243       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
244       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
245       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
246     }
247 
248     /* Extract constraints on R nodes: C_{CR}  */
249     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
250     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
251     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
252 
253     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
254     for (i=0;i<n_constraints;i++) {
255       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
256       /* Get row of constraint matrix in R numbering */
257       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
258       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
259       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
260       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
261       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
262       /* Solve for row of constraint matrix in R numbering */
263       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
264       /* Set values in local_auxmat2 */
265       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
266       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
267       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
268     }
269     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
271     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
272 
273     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
274     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
275     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
276     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
277     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
278     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
279     ierr = MatSetUp(M1);CHKERRQ(ierr);
280     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
281     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
282     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
283     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
284     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
285     ierr = MatDestroy(&M2);CHKERRQ(ierr);
286     ierr = MatDestroy(&M3);CHKERRQ(ierr);
287     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
288     if (!pcbddc->local_auxmat1) {
289       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
290     } else {
291       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
292     }
293   }
294 
295   /* Get submatrices from subdomain matrix */
296   if (n_vertices) {
297     PetscInt ibs,mbs;
298     PetscBool issbaij;
299     Mat newmat;
300 
301     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
302     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
303     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
304     if (ibs != mbs) { /* need to convert to SEQAIJ */
305       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
308       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
309       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
310     } else {
311       /* this is safe */
312       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
313       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
314       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
315         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
316         /* which of the two approaches is faster? */
317         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
318         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
319         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
320         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
321         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
322       } else {
323         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
324         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
325       }
326     }
327     ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
328     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
329     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
330   }
331 
332   /* Matrix of coarse basis functions (local) */
333   if (pcbddc->coarse_phi_B) {
334     PetscInt on_B,on_primal;
335     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
336     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
337       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
338       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
339     }
340   }
341   if (pcbddc->coarse_phi_D) {
342     PetscInt on_D,on_primal;
343     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
344     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
345       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
346       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
347     }
348   }
349   if (!pcbddc->coarse_phi_B) {
350     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
351     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
352     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
353     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
354   }
355   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
356     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
357     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
358     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
359     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
360   }
361 
362   if (pcbddc->dbg_flag) {
363     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
364     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
365     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
366   }
367   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
368   ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
371 
372   /* vertices */
373   for (i=0;i<n_vertices;i++) {
374     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
375     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
376     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
377     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
378     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
379     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
380     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
381     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
382     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
383     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
384     if (n_constraints) {
385       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
386       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
387       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
388     }
389     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
390     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
391 
392     /* Set values in coarse basis function and subdomain part of coarse_mat */
393     /* coarse basis functions */
394     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
395     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
397     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
398     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
399     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
400     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
401     if (pcbddc->switch_static || pcbddc->dbg_flag) {
402       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
404       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
405       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
406       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
407     }
408     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
409     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
410     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
411     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
412     if (n_constraints) {
413       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
414       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
415       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
416     }
417 
418     /* check */
419     if (pcbddc->dbg_flag) {
420       /* assemble subdomain vector on local nodes */
421       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
422       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
423       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
424       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
425       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
426       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
427       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
428       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
429       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
430       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
431       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
432       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
433       if (n_constraints) {
434         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
435         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
436         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
437       }
438       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
440       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
441       /* check saddle point solution */
442       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
443       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
444       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
445       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
446       /* shift by the identity matrix */
447       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
448       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
449       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
451     }
452   }
453 
454   /* constraints */
455   for (i=0;i<n_constraints;i++) {
456     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
457     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
458     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
459     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
460     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
461     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
462     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
463     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
464     if (n_vertices) {
465       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
466     }
467     /* Set values in coarse basis function and subdomain part of coarse_mat */
468     /* coarse basis functions */
469     j = i+n_vertices; /* don't touch this! */
470     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
471     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
472     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
474     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
475     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
476     if (pcbddc->switch_static || pcbddc->dbg_flag) {
477       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
480       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
481       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
482     }
483     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
484     if (n_vertices) {
485       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
486       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
487       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
488     }
489     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
490     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
491     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
492 
493     if (pcbddc->dbg_flag) {
494       /* assemble subdomain vector on nodes */
495       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
496       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
497       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
498       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
499       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
500       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
501       /* assemble subdomain vector of lagrange multipliers */
502       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
503       if (n_vertices) {
504         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
505         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
506         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
507       }
508       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
509       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
510       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
511       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
512       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
513       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
514       /* check saddle point solution */
515       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
516       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
517       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
518       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
519       /* shift by the identity matrix */
520       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
521       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
522       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
523       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
524     }
525   }
526   /* call assembling routines for local coarse basis */
527   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
529   if (pcbddc->switch_static || pcbddc->dbg_flag) {
530     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
532   }
533 
534   /* compute other basis functions for non-symmetric problems */
535   /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/
536   { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */
537     PetscBool setsym;
538     ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr);
539     if (!setsym) pcbddc->issym = PETSC_FALSE;
540   }
541 
542   if (!pcbddc->issym) {
543     if (!pcbddc->coarse_psi_B) {
544       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
545       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
546       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
547       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
548     }
549     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
550       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
551       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
552       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
553       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
554     }
555     for (i=0;i<pcbddc->local_primal_size;i++) {
556       if (n_constraints) {
557         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
558         for (j=0;j<n_constraints;j++) {
559           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
560         }
561         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
562         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
563       }
564       if (i<n_vertices) {
565         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
566         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
567         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
568         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
569         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
570         if (n_constraints) {
571           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
572         }
573       } else {
574         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
575       }
576       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
577       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
578       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
579       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
580       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
581       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
582       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
583       if (i<n_vertices) {
584         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
585       }
586       if (pcbddc->switch_static || pcbddc->dbg_flag) {
587         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
588         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
589         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
590         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
591         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
592       }
593 
594       if (pcbddc->dbg_flag) {
595         /* assemble subdomain vector on nodes */
596         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
597         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
598         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
599         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
600         if (i<n_vertices) {
601           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
602         }
603         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
604         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
605         /* assemble subdomain vector of lagrange multipliers */
606         for (j=0;j<pcbddc->local_primal_size;j++) {
607           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
608         }
609         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
610         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
611         /* check saddle point solution */
612         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
613         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
614         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
615         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
616         /* shift by the identity matrix */
617         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
618         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
619         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
620         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
621       }
622     }
623     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
624     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
625     if (pcbddc->switch_static || pcbddc->dbg_flag) {
626       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
627       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
628     }
629     unsymmetric_check = PETSC_TRUE;
630   } else { /* take references to already computed coarse basis */
631     unsymmetric_check = PETSC_FALSE;
632     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
633     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
634     if (pcbddc->coarse_phi_D) {
635       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
636       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
637     }
638   }
639   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
640   /* Checking coarse_sub_mat and coarse basis functios */
641   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
642   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
643   if (pcbddc->dbg_flag) {
644     Mat         coarse_sub_mat;
645     Mat         AUXMAT,TM1,TM2,TM3,TM4;
646     Mat         coarse_phi_D,coarse_phi_B;
647     Mat         coarse_psi_D,coarse_psi_B;
648     Mat         A_II,A_BB,A_IB,A_BI;
649     MatType     checkmattype=MATSEQAIJ;
650     PetscReal   real_value;
651 
652     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
653     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
654     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
655     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
656     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
657     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
658     if (unsymmetric_check) {
659       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
660       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
661     }
662     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
663 
664     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
665     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
666     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
667     if (unsymmetric_check) {
668       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
669       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
670       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
671       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
672       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
673       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
674       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
675       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
676       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
677       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
678       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
679       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
680     } else {
681       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
682       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
683       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
684       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
685       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
686       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
687       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
688       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
689     }
690     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
691     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
692     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
693     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
694     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
695     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
696     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
697     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
698     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
699     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
700     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
701     for (i=0;i<pcbddc->local_primal_size;i++) {
702       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
703     }
704     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
705     for (i=0;i<pcbddc->local_primal_size;i++) {
706       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
707     }
708     if (unsymmetric_check) {
709       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
710       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
711         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
712       }
713       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
714       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
715         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
716       }
717     }
718     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
719     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
720     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
721     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
722     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
723     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
724     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
725     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
726     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
727     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
728     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
729     if (unsymmetric_check) {
730       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
731       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
732     }
733     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
734     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
735     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
736     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
737   }
738   /* free memory */
739   if (n_vertices) {
740     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
741     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
742     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
743     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
744     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
745   }
746   if (n_constraints) {
747     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
748     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
749     ierr = MatDestroy(&M1);CHKERRQ(ierr);
750     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
751   }
752   ierr = PetscFree(auxindices);CHKERRQ(ierr);
753   /* get back data */
754   *coarse_submat_vals_n = coarse_submat_vals;
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
760 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
761 {
762   PC_IS*            pcis = (PC_IS*)(pc->data);
763   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
764   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
765   PetscBool         issbaij,isseqaij;
766   /* manage repeated solves */
767   MatReuse          reuse;
768   PetscErrorCode    ierr;
769 
770   PetscFunctionBegin;
771   if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) {
772     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
773   }
774   /* get mat flags */
775   reuse = MAT_INITIAL_MATRIX;
776   if (pc->setupcalled) {
777     if (pc->flag == SAME_NONZERO_PATTERN) {
778       reuse = MAT_REUSE_MATRIX;
779     } else {
780       reuse = MAT_INITIAL_MATRIX;
781     }
782   }
783   if (reuse == MAT_INITIAL_MATRIX) {
784     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
785     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
786     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
787     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
788     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
789   }
790 
791   /* transform local matrices if needed */
792   if (pcbddc->ChangeOfBasisMatrix) {
793     Mat         change_mat_all;
794     PetscScalar *row_cmat_values;
795     PetscInt    *row_cmat_indices;
796     PetscInt    *nnz,*is_indices,*temp_indices;
797     PetscInt    i,j,k,n_D,n_B;
798 
799     /* Get Non-overlapping dimensions */
800     n_B = pcis->n_B;
801     n_D = pcis->n-n_B;
802 
803     /* compute nonzero structure of change of basis on all local nodes */
804     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
805     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
806     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
807     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
808     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
809     k=1;
810     for (i=0;i<n_B;i++) {
811       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
812       nnz[is_indices[i]]=j;
813       if (k < j) k = j;
814       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
815     }
816     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
817     /* assemble change of basis matrix on the whole set of local dofs */
818     ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr);
819     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
820     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
821     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
822     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
823     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
824     for (i=0;i<n_D;i++) {
825       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
826     }
827     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
828     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
829     for (i=0;i<n_B;i++) {
830       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
831       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
832       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
833       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
834     }
835     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
836     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
837     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
838     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr);
839     if (isseqaij) {
840       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
841     } else {
842       Mat work_mat;
843       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
844       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
845       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
846     }
847     /*
848     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
849     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
850     */
851     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
852     ierr = PetscFree(nnz);CHKERRQ(ierr);
853     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
854   } else {
855     /* without change of basis, the local matrix is unchanged */
856     if (!pcbddc->local_mat) {
857       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
858       pcbddc->local_mat = matis->A;
859     }
860   }
861 
862   /* get submatrices */
863   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
864   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
865   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
866   if (!issbaij) {
867     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
868     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
869   } else {
870     Mat newmat;
871     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
872     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
873     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
874     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
875   }
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
881 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
882 {
883   PC_IS*         pcis = (PC_IS*)(pc->data);
884   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
885   IS             is_aux1,is_aux2;
886   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
887   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
888   PetscInt       vbs,bs;
889   PetscBT        bitmask;
890   PetscErrorCode ierr;
891 
892   PetscFunctionBegin;
893   /*
894     No need to setup local scatters if
895       - primal space is unchanged
896         AND
897       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
898         AND
899       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
900   */
901   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
902     PetscFunctionReturn(0);
903   }
904   /* destroy old objects */
905   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
906   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
907   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
908   /* Set Non-overlapping dimensions */
909   n_B = pcis->n_B; n_D = pcis->n - n_B;
910   n_vertices = pcbddc->n_actual_vertices;
911   /* create auxiliary bitmask */
912   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
913   for (i=0;i<n_vertices;i++) {
914     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
915   }
916 
917   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
918   ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr);
919   for (i=0, n_R=0; i<pcis->n; i++) {
920     if (!PetscBTLookup(bitmask,i)) {
921       idx_R_local[n_R] = i;
922       n_R++;
923     }
924   }
925 
926   /* Block code */
927   vbs = 1;
928   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
929   if (bs>1 && !(n_vertices%bs)) {
930     PetscBool is_blocked = PETSC_TRUE;
931     PetscInt  *vary;
932     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
933     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
934     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
935     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
936     for (i=0; i<n_vertices; i++) {
937       if (vary[i]!=0 && vary[i]!=bs) {
938         is_blocked = PETSC_FALSE;
939         break;
940       }
941     }
942     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
943       vbs = bs;
944       for (i=0;i<n_R/vbs;i++) {
945         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
946       }
947     }
948     ierr = PetscFree(vary);CHKERRQ(ierr);
949   }
950   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
951   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
952 
953   /* print some info if requested */
954   if (pcbddc->dbg_flag) {
955     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
956     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
957     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
958     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
959     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
960     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
961     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
962     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
963   }
964 
965   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
966   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
967   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr);
968   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr);
969   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
970   for (i=0; i<n_D; i++) {
971     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
972   }
973   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
974   for (i=0, j=0; i<n_R; i++) {
975     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
976       aux_array1[j++] = i;
977     }
978   }
979   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
980   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
981   for (i=0, j=0; i<n_B; i++) {
982     if (!PetscBTLookup(bitmask,is_indices[i])) {
983       aux_array2[j++] = i;
984     }
985   }
986   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
987   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
988   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
989   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
990   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
991 
992   if (pcbddc->switch_static || pcbddc->dbg_flag) {
993     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
994     for (i=0, j=0; i<n_R; i++) {
995       if (PetscBTLookup(bitmask,idx_R_local[i])) {
996         aux_array1[j++] = i;
997       }
998     }
999     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1000     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1001     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1002   }
1003   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1004   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1011 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1012 {
1013   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1014   PC_IS          *pcis = (PC_IS*)pc->data;
1015   PC             pc_temp;
1016   Mat            A_RR;
1017   MatReuse       reuse;
1018   PetscScalar    m_one = -1.0;
1019   PetscReal      value;
1020   PetscInt       n_D,n_R,ibs,mbs;
1021   PetscBool      use_exact,use_exact_reduced,issbaij;
1022   PetscErrorCode ierr;
1023   /* prefixes stuff */
1024   char           dir_prefix[256],neu_prefix[256],str_level[16];
1025   size_t         len;
1026 
1027   PetscFunctionBegin;
1028 
1029   /* compute prefixes */
1030   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1031   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1032   if (!pcbddc->current_level) {
1033     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1034     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1035     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1036     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1037   } else {
1038     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1039     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1040     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1041     len -= 15; /* remove "pc_bddc_coarse_" */
1042     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1043     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1044     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1045     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1046     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1047     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1048     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1049     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1050   }
1051 
1052   /* DIRICHLET PROBLEM */
1053   /* Matrix for Dirichlet problem is pcis->A_II */
1054   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1055   if (!pcbddc->ksp_D) { /* create object if not yet build */
1056     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1057     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1058     /* default */
1059     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1060     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1061     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1062     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1063     if (issbaij) {
1064       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1065     } else {
1066       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1067     }
1068     /* Allow user's customization */
1069     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1070     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1071   }
1072   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1073   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1074   if (!n_D) {
1075     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1076     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1077   }
1078   /* Set Up KSP for Dirichlet problem of BDDC */
1079   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1080   /* set ksp_D into pcis data */
1081   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1082   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1083   pcis->ksp_D = pcbddc->ksp_D;
1084 
1085   /* NEUMANN PROBLEM */
1086   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1087   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1088   if (pcbddc->ksp_R) { /* already created ksp */
1089     PetscInt nn_R;
1090     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1091     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1092     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1093     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1094       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1095       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1096       reuse = MAT_INITIAL_MATRIX;
1097     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1098       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1099         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1100         reuse = MAT_INITIAL_MATRIX;
1101       } else { /* safe to reuse the matrix */
1102         reuse = MAT_REUSE_MATRIX;
1103       }
1104     }
1105     /* last check */
1106     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1107       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1108       reuse = MAT_INITIAL_MATRIX;
1109     }
1110   } else { /* first time, so we need to create the matrix */
1111     reuse = MAT_INITIAL_MATRIX;
1112   }
1113   /* extract A_RR */
1114   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1115   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1116   if (ibs != mbs) {
1117     Mat newmat;
1118     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1119     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1120     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1121   } else {
1122     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1123   }
1124   if (!pcbddc->ksp_R) { /* create object if not present */
1125     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1126     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1127     /* default */
1128     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1129     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1130     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1131     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1132     if (issbaij) {
1133       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1134     } else {
1135       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1136     }
1137     /* Allow user's customization */
1138     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1139     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1140   }
1141   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1142   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1143   if (!n_R) {
1144     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1145     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1146   }
1147   /* Set Up KSP for Neumann problem of BDDC */
1148   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1149 
1150   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1151   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1152     /* Dirichlet */
1153     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1154     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1155     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1156     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1157     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1158     /* need to be adapted? */
1159     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1160     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1161     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1162     /* print info */
1163     if (pcbddc->dbg_flag) {
1164       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1165       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1166       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1167       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1168       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1169       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1170     }
1171     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1172       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1173     }
1174 
1175     /* Neumann */
1176     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1177     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1178     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1179     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1180     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1181     /* need to be adapted? */
1182     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1183     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1184     /* print info */
1185     if (pcbddc->dbg_flag) {
1186       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1187       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1188     }
1189     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1190       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1191     }
1192   }
1193   /* free Neumann problem's matrix */
1194   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1200 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1201 {
1202   PetscErrorCode ierr;
1203   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1204 
1205   PetscFunctionBegin;
1206   if (applytranspose) {
1207     if (pcbddc->local_auxmat1) {
1208       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1209       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1210     }
1211     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1212   } else {
1213     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1214     if (pcbddc->local_auxmat1) {
1215       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1216       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1217     }
1218   }
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1223 #undef __FUNCT__
1224 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1225 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1226 {
1227   PetscErrorCode ierr;
1228   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1229   PC_IS*            pcis = (PC_IS*)  (pc->data);
1230   const PetscScalar zero = 0.0;
1231 
1232   PetscFunctionBegin;
1233   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1234   if (applytranspose) {
1235     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1236     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1237   } else {
1238     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1239     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1240   }
1241   /* start communications from local primal nodes to rhs of coarse solver */
1242   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1243   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245 
1246   /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */
1247   /* TODO remove null space when doing multilevel */
1248   if (pcbddc->coarse_ksp) {
1249     if (applytranspose) {
1250       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1251     } else {
1252       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1253     }
1254   }
1255   /* start communications from coarse solver solution to local primal nodes */
1256   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1257 
1258   /* Local solution on R nodes */
1259   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1260   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1261   ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1262   if (pcbddc->switch_static) {
1263     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1264     ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1265   }
1266   ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1267   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1268   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269   ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270   if (pcbddc->switch_static) {
1271     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1272     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1273   }
1274 
1275   /* complete communications from coarse sol to local primal nodes */
1276   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1277 
1278   /* Sum contributions from two levels */
1279   if (applytranspose) {
1280     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1281     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1282   } else {
1283     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1284     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1285   }
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1290 #undef __FUNCT__
1291 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1292 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1293 {
1294   PetscErrorCode ierr;
1295   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1296   PetscScalar    *array,*array2;
1297   Vec            from,to;
1298 
1299   PetscFunctionBegin;
1300   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1301     from = pcbddc->coarse_vec;
1302     to = pcbddc->vec1_P;
1303     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1304       Vec tvec;
1305       PetscInt lsize;
1306       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1307       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1308       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1309       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1310       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1311       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1312       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1313     }
1314   } else { /* from local to global -> put data in coarse right hand side */
1315     from = pcbddc->vec1_P;
1316     to = pcbddc->coarse_vec;
1317   }
1318   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1324 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1325 {
1326   PetscErrorCode ierr;
1327   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1328   PetscScalar    *array,*array2;
1329   Vec            from,to;
1330 
1331   PetscFunctionBegin;
1332   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1333     from = pcbddc->coarse_vec;
1334     to = pcbddc->vec1_P;
1335   } else { /* from local to global -> put data in coarse right hand side */
1336     from = pcbddc->vec1_P;
1337     to = pcbddc->coarse_vec;
1338   }
1339   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1340   if (smode == SCATTER_FORWARD) {
1341     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1342       Vec tvec;
1343       PetscInt lsize;
1344       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1345       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1346       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1347       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1348       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1349       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1350       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1351     }
1352   }
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 /* uncomment for testing purposes */
1357 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1358 #undef __FUNCT__
1359 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1360 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1361 {
1362   PetscErrorCode    ierr;
1363   PC_IS*            pcis = (PC_IS*)(pc->data);
1364   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1365   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1366   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1367   MatType           impMatType=MATSEQAIJ;
1368   /* one and zero */
1369   PetscScalar       one=1.0,zero=0.0;
1370   /* space to store constraints and their local indices */
1371   PetscScalar       *temp_quadrature_constraint;
1372   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1373   /* iterators */
1374   PetscInt          i,j,k,total_counts,temp_start_ptr;
1375   /* stuff to store connected components stored in pcbddc->mat_graph */
1376   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1377   PetscInt          n_ISForFaces,n_ISForEdges;
1378   /* near null space stuff */
1379   MatNullSpace      nearnullsp;
1380   const Vec         *nearnullvecs;
1381   Vec               *localnearnullsp;
1382   PetscBool         nnsp_has_cnst;
1383   PetscInt          nnsp_size;
1384   PetscScalar       *array;
1385   /* BLAS integers */
1386   PetscBLASInt      lwork,lierr;
1387   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1388   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1389   /* LAPACK working arrays for SVD or POD */
1390   PetscBool         skip_lapack;
1391   PetscScalar       *work;
1392   PetscReal         *singular_vals;
1393 #if defined(PETSC_USE_COMPLEX)
1394   PetscReal         *rwork;
1395 #endif
1396 #if defined(PETSC_MISSING_LAPACK_GESVD)
1397   PetscBLASInt      Blas_one_2=1;
1398   PetscScalar       *temp_basis,*correlation_mat;
1399 #else
1400   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1401   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1402 #endif
1403   /* reuse */
1404   PetscInt          olocal_primal_size;
1405   PetscInt          *oprimal_indices_local_idxs;
1406   /* change of basis */
1407   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1408   PetscBool         boolforchange,qr_needed;
1409   PetscBT           touched,change_basis,qr_needed_idx;
1410   /* auxiliary stuff */
1411   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1412   /* some quantities */
1413   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1414   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1415 
1416 
1417   PetscFunctionBegin;
1418   /* Destroy Mat objects computed previously */
1419   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1420   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1421   /* Get index sets for faces, edges and vertices from graph */
1422   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1423   /* free unneeded index sets */
1424   if (!pcbddc->use_vertices) {
1425     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1426   }
1427   if (!pcbddc->use_edges) {
1428     for (i=0;i<n_ISForEdges;i++) {
1429       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1430     }
1431     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1432     n_ISForEdges = 0;
1433   }
1434   if (!pcbddc->use_faces) {
1435     for (i=0;i<n_ISForFaces;i++) {
1436       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1437     }
1438     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1439     n_ISForFaces = 0;
1440   }
1441   /* HACKS (the following two blocks of code) */
1442   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1443     pcbddc->use_change_of_basis = PETSC_TRUE;
1444     if (!ISForEdges) {
1445       pcbddc->use_change_on_faces = PETSC_TRUE;
1446     }
1447   }
1448   if (pcbddc->NullSpace) {
1449     /* use_change_of_basis should be consistent among processors */
1450     PetscBool tbool[2],gbool[2];
1451     tbool [0] = pcbddc->use_change_of_basis;
1452     tbool [1] = pcbddc->use_change_on_faces;
1453     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1454     pcbddc->use_change_of_basis = gbool[0];
1455     pcbddc->use_change_on_faces = gbool[1];
1456   }
1457   /* print some info */
1458   if (pcbddc->dbg_flag) {
1459     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1460     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1461     i = 0;
1462     if (ISForVertices) {
1463       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1464     }
1465     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1466     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1467     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1468     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1469   }
1470   /* check if near null space is attached to global mat */
1471   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1472   if (nearnullsp) {
1473     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1474     /* remove any stored info */
1475     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1476     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1477     /* store information for BDDC solver reuse */
1478     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1479     pcbddc->onearnullspace = nearnullsp;
1480     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1481     for (i=0;i<nnsp_size;i++) {
1482       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1483     }
1484   } else { /* if near null space is not provided BDDC uses constants by default */
1485     nnsp_size = 0;
1486     nnsp_has_cnst = PETSC_TRUE;
1487   }
1488   /* get max number of constraints on a single cc */
1489   max_constraints = nnsp_size;
1490   if (nnsp_has_cnst) max_constraints++;
1491 
1492   /*
1493        Evaluate maximum storage size needed by the procedure
1494        - temp_indices will contain start index of each constraint stored as follows
1495        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1496        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1497        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1498                                                                                                                                                          */
1499   total_counts = n_ISForFaces+n_ISForEdges;
1500   total_counts *= max_constraints;
1501   n_vertices = 0;
1502   if (ISForVertices) {
1503     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1504   }
1505   total_counts += n_vertices;
1506   ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr);
1507   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1508   total_counts = 0;
1509   max_size_of_constraint = 0;
1510   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1511     if (i<n_ISForEdges) {
1512       used_IS = &ISForEdges[i];
1513     } else {
1514       used_IS = &ISForFaces[i-n_ISForEdges];
1515     }
1516     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1517     total_counts += j;
1518     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1519   }
1520   total_counts *= max_constraints;
1521   total_counts += n_vertices;
1522   ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr);
1523   ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr);
1524   ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1525   /* get local part of global near null space vectors */
1526   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1527   for (k=0;k<nnsp_size;k++) {
1528     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1529     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1530     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1531   }
1532 
1533   /* whether or not to skip lapack calls */
1534   skip_lapack = PETSC_TRUE;
1535   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1536 
1537   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1538   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1539     PetscScalar temp_work;
1540 #if defined(PETSC_MISSING_LAPACK_GESVD)
1541     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1542     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1543     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1544     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1545 #if defined(PETSC_USE_COMPLEX)
1546     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1547 #endif
1548     /* now we evaluate the optimal workspace using query with lwork=-1 */
1549     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1550     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1551     lwork = -1;
1552     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1553 #if !defined(PETSC_USE_COMPLEX)
1554     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1555 #else
1556     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1557 #endif
1558     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1559     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1560 #else /* on missing GESVD */
1561     /* SVD */
1562     PetscInt max_n,min_n;
1563     max_n = max_size_of_constraint;
1564     min_n = max_constraints;
1565     if (max_size_of_constraint < max_constraints) {
1566       min_n = max_size_of_constraint;
1567       max_n = max_constraints;
1568     }
1569     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1570 #if defined(PETSC_USE_COMPLEX)
1571     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1572 #endif
1573     /* now we evaluate the optimal workspace using query with lwork=-1 */
1574     lwork = -1;
1575     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1576     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1577     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1578     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1579 #if !defined(PETSC_USE_COMPLEX)
1580     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1581 #else
1582     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1583 #endif
1584     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1585     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1586 #endif /* on missing GESVD */
1587     /* Allocate optimal workspace */
1588     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1589     ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr);
1590   }
1591   /* Now we can loop on constraining sets */
1592   total_counts = 0;
1593   temp_indices[0] = 0;
1594   /* vertices */
1595   if (ISForVertices) {
1596     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1597     if (nnsp_has_cnst) { /* consider all vertices */
1598       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1599       for (i=0;i<n_vertices;i++) {
1600         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1601         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1602         total_counts++;
1603       }
1604     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1605       PetscBool used_vertex;
1606       for (i=0;i<n_vertices;i++) {
1607         used_vertex = PETSC_FALSE;
1608         k = 0;
1609         while (!used_vertex && k<nnsp_size) {
1610           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1611           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1612             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1613             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1614             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1615             total_counts++;
1616             used_vertex = PETSC_TRUE;
1617           }
1618           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1619           k++;
1620         }
1621       }
1622     }
1623     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1624     n_vertices = total_counts;
1625   }
1626 
1627   /* edges and faces */
1628   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1629     if (i<n_ISForEdges) {
1630       used_IS = &ISForEdges[i];
1631       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1632     } else {
1633       used_IS = &ISForFaces[i-n_ISForEdges];
1634       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1635     }
1636     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1637     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1638     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1639     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1640     /* change of basis should not be performed on local periodic nodes */
1641     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1642     if (nnsp_has_cnst) {
1643       PetscScalar quad_value;
1644       temp_constraints++;
1645       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1646       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1647       for (j=0;j<size_of_constraint;j++) {
1648         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1649       }
1650       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1651       total_counts++;
1652     }
1653     for (k=0;k<nnsp_size;k++) {
1654       PetscReal real_value;
1655       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1656       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1657       for (j=0;j<size_of_constraint;j++) {
1658         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1659       }
1660       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1661       /* check if array is null on the connected component */
1662       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1663       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1664       if (real_value > 0.0) { /* keep indices and values */
1665         temp_constraints++;
1666         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1667         total_counts++;
1668       }
1669     }
1670     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1671     valid_constraints = temp_constraints;
1672     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1673     if (!pcbddc->use_nnsp_true && temp_constraints) {
1674       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1675 
1676 #if defined(PETSC_MISSING_LAPACK_GESVD)
1677       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1678          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1679          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1680             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1681             from that computed using LAPACKgesvd
1682          -> This is due to a different computation of eigenvectors in LAPACKheev
1683          -> The quality of the POD-computed basis will be the same */
1684       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1685       /* Store upper triangular part of correlation matrix */
1686       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1687       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1688       for (j=0;j<temp_constraints;j++) {
1689         for (k=0;k<j+1;k++) {
1690           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1691         }
1692       }
1693       /* compute eigenvalues and eigenvectors of correlation matrix */
1694       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1695       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1696 #if !defined(PETSC_USE_COMPLEX)
1697       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1698 #else
1699       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1700 #endif
1701       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1702       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1703       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1704       j=0;
1705       while (j < temp_constraints && singular_vals[j] < tol) j++;
1706       total_counts=total_counts-j;
1707       valid_constraints = temp_constraints-j;
1708       /* scale and copy POD basis into used quadrature memory */
1709       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1710       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1711       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1712       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1713       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1714       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1715       if (j<temp_constraints) {
1716         PetscInt ii;
1717         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1718         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1719         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1720         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1721         for (k=0;k<temp_constraints-j;k++) {
1722           for (ii=0;ii<size_of_constraint;ii++) {
1723             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1724           }
1725         }
1726       }
1727 #else  /* on missing GESVD */
1728       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1729       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1730       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1731       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1732 #if !defined(PETSC_USE_COMPLEX)
1733       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1734 #else
1735       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1736 #endif
1737       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1738       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1739       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1740       k = temp_constraints;
1741       if (k > size_of_constraint) k = size_of_constraint;
1742       j = 0;
1743       while (j < k && singular_vals[k-j-1] < tol) j++;
1744       total_counts = total_counts-temp_constraints+k-j;
1745       valid_constraints = k-j;
1746 #endif /* on missing GESVD */
1747     }
1748     /* setting change_of_basis flag is safe now */
1749     if (boolforchange) {
1750       for (j=0;j<valid_constraints;j++) {
1751         PetscBTSet(change_basis,total_counts-j-1);
1752       }
1753     }
1754   }
1755   /* free index sets of faces, edges and vertices */
1756   for (i=0;i<n_ISForFaces;i++) {
1757     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1758   }
1759   if (n_ISForFaces) {
1760     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1761   }
1762   for (i=0;i<n_ISForEdges;i++) {
1763     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1764   }
1765   if (n_ISForEdges) {
1766     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1767   }
1768   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1769   /* map temp_indices_to_constraint in boundary numbering */
1770   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1771   if (i != temp_indices[total_counts]) {
1772     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1773   }
1774 
1775   /* free workspace */
1776   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1777     ierr = PetscFree(work);CHKERRQ(ierr);
1778 #if defined(PETSC_USE_COMPLEX)
1779     ierr = PetscFree(rwork);CHKERRQ(ierr);
1780 #endif
1781     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1782 #if defined(PETSC_MISSING_LAPACK_GESVD)
1783     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1784     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1785 #endif
1786   }
1787   for (k=0;k<nnsp_size;k++) {
1788     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1789   }
1790   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1791 
1792   /* set quantities in pcbddc data structure and store previous primal size */
1793   /* n_vertices defines the number of subdomain corners in the primal space */
1794   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1795   olocal_primal_size = pcbddc->local_primal_size;
1796   pcbddc->local_primal_size = total_counts;
1797   pcbddc->n_vertices = n_vertices;
1798   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1799 
1800   /* Create constraint matrix */
1801   /* The constraint matrix is used to compute the l2g map of primal dofs */
1802   /* so we need to set it up properly either with or without change of basis */
1803   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1804   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1805   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1806   /* array to compute a local numbering of constraints : vertices first then constraints */
1807   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1808   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1809   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1810   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1811   /* auxiliary stuff for basis change */
1812   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1813   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1814 
1815   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1816   total_primal_vertices=0;
1817   for (i=0;i<pcbddc->local_primal_size;i++) {
1818     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1819     if (size_of_constraint == 1) {
1820       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1821       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1822       aux_primal_minloc[total_primal_vertices]=0;
1823       total_primal_vertices++;
1824     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1825       PetscInt min_loc,min_index;
1826       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1827       /* find first untouched local node */
1828       k = 0;
1829       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1830       min_index = global_indices[k];
1831       min_loc = k;
1832       /* search the minimum among global nodes already untouched on the cc */
1833       for (k=1;k<size_of_constraint;k++) {
1834         /* there can be more than one constraint on a single connected component */
1835         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1836           min_index = global_indices[k];
1837           min_loc = k;
1838         }
1839       }
1840       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1841       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1842       aux_primal_minloc[total_primal_vertices]=min_loc;
1843       total_primal_vertices++;
1844     }
1845   }
1846   /* determine if a QR strategy is needed for change of basis */
1847   qr_needed = PETSC_FALSE;
1848   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1849   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1850     if (PetscBTLookup(change_basis,i)) {
1851       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1852       j = 0;
1853       for (k=0;k<size_of_constraint;k++) {
1854         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1855           j++;
1856         }
1857       }
1858       /* found more than one primal dof on the cc */
1859       if (j > 1) {
1860         PetscBTSet(qr_needed_idx,i);
1861         qr_needed = PETSC_TRUE;
1862       }
1863     }
1864   }
1865   /* free workspace */
1866   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1867 
1868   /* permute indices in order to have a sorted set of vertices */
1869   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1870 
1871   /* nonzero structure of constraint matrix */
1872   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1873   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1874   j=total_primal_vertices;
1875   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1876     if (!PetscBTLookup(change_basis,i)) {
1877       nnz[j]=temp_indices[i+1]-temp_indices[i];
1878       j++;
1879     }
1880   }
1881   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1882   ierr = PetscFree(nnz);CHKERRQ(ierr);
1883   /* set values in constraint matrix */
1884   for (i=0;i<total_primal_vertices;i++) {
1885     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1886   }
1887   total_counts = total_primal_vertices;
1888   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1889     if (!PetscBTLookup(change_basis,i)) {
1890       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1891       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1892       total_counts++;
1893     }
1894   }
1895   /* assembling */
1896   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1897   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1898   /*
1899   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1900   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1901   */
1902   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1903   if (pcbddc->use_change_of_basis) {
1904     /* dual and primal dofs on a single cc */
1905     PetscInt     dual_dofs,primal_dofs;
1906     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1907     PetscInt     primal_counter;
1908     /* working stuff for GEQRF */
1909     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1910     PetscBLASInt lqr_work;
1911     /* working stuff for UNGQR */
1912     PetscScalar  *gqr_work,lgqr_work_t;
1913     PetscBLASInt lgqr_work;
1914     /* working stuff for TRTRS */
1915     PetscScalar  *trs_rhs;
1916     PetscBLASInt Blas_NRHS;
1917     /* pointers for values insertion into change of basis matrix */
1918     PetscInt     *start_rows,*start_cols;
1919     PetscScalar  *start_vals;
1920     /* working stuff for values insertion */
1921     PetscBT      is_primal;
1922 
1923     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1924     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1925     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1926     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1927     /* work arrays */
1928     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
1929     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1930     /* nonzeros per row */
1931     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1932       if (PetscBTLookup(change_basis,i)) {
1933         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1934         if (PetscBTLookup(qr_needed_idx,i)) {
1935           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1936         } else {
1937           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1938           /* get local primal index on the cc */
1939           j = 0;
1940           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1941           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1942         }
1943       }
1944     }
1945     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1946     ierr = PetscFree(nnz);CHKERRQ(ierr);
1947     /* Set initial identity in the matrix */
1948     for (i=0;i<pcis->n_B;i++) {
1949       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1950     }
1951 
1952     if (pcbddc->dbg_flag) {
1953       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1954       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1955     }
1956 
1957 
1958     /* Now we loop on the constraints which need a change of basis */
1959     /*
1960        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1961        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1962 
1963        Basic blocks of change of basis matrix T computed
1964 
1965           - Using the following block transformation if there is only a primal dof on the cc
1966             (in the example, primal dof is the last one of the edge in LOCAL ordering
1967              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1968             | 1        0   ...        0     1 |
1969             | 0        1   ...        0     1 |
1970             |              ...                |
1971             | 0        ...            1     1 |
1972             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1973 
1974           - via QR decomposition of constraints otherwise
1975     */
1976     if (qr_needed) {
1977       /* space to store Q */
1978       ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr);
1979       /* first we issue queries for optimal work */
1980       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1981       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1982       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1983       lqr_work = -1;
1984       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1985       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1986       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1987       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
1988       lgqr_work = -1;
1989       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1990       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1991       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1992       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1993       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1994       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1995       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1996       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1997       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
1998       /* array to store scaling factors for reflectors */
1999       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2000       /* array to store rhs and solution of triangular solver */
2001       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2002       /* allocating workspace for check */
2003       if (pcbddc->dbg_flag) {
2004         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2005       }
2006     }
2007     /* array to store whether a node is primal or not */
2008     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2009     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2010     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2011     if (i != total_primal_vertices) {
2012       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2013     }
2014     for (i=0;i<total_primal_vertices;i++) {
2015       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2016     }
2017     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2018 
2019     /* loop on constraints and see whether or not they need a change of basis and compute it */
2020     /* -> using implicit ordering contained in temp_indices data */
2021     total_counts = pcbddc->n_vertices;
2022     primal_counter = total_counts;
2023     while (total_counts<pcbddc->local_primal_size) {
2024       primal_dofs = 1;
2025       if (PetscBTLookup(change_basis,total_counts)) {
2026         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2027         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2028           primal_dofs++;
2029         }
2030         /* get constraint info */
2031         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2032         dual_dofs = size_of_constraint-primal_dofs;
2033 
2034         if (pcbddc->dbg_flag) {
2035           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2036         }
2037 
2038         if (primal_dofs > 1) { /* QR */
2039 
2040           /* copy quadrature constraints for change of basis check */
2041           if (pcbddc->dbg_flag) {
2042             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2043           }
2044           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2045           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2046 
2047           /* compute QR decomposition of constraints */
2048           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2049           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2050           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2051           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2052           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2053           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2054           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2055 
2056           /* explictly compute R^-T */
2057           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2058           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2059           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2060           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2061           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2062           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2063           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2064           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2065           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2066           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2067 
2068           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2069           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2070           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2071           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2072           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2073           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2074           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2075           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2076           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2077 
2078           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2079              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2080              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2081           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2082           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2083           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2084           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2085           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2086           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2087           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2088           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2089           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2090           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2091 
2092           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2093           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2094           /* insert cols for primal dofs */
2095           for (j=0;j<primal_dofs;j++) {
2096             start_vals = &qr_basis[j*size_of_constraint];
2097             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2098             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2099           }
2100           /* insert cols for dual dofs */
2101           for (j=0,k=0;j<dual_dofs;k++) {
2102             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2103               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2104               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2105               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2106               j++;
2107             }
2108           }
2109 
2110           /* check change of basis */
2111           if (pcbddc->dbg_flag) {
2112             PetscInt   ii,jj;
2113             PetscBool valid_qr=PETSC_TRUE;
2114             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2115             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2116             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2117             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2118             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2119             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2120             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2121             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2122             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2123             for (jj=0;jj<size_of_constraint;jj++) {
2124               for (ii=0;ii<primal_dofs;ii++) {
2125                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2126                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2127               }
2128             }
2129             if (!valid_qr) {
2130               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2131               for (jj=0;jj<size_of_constraint;jj++) {
2132                 for (ii=0;ii<primal_dofs;ii++) {
2133                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2134                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2135                   }
2136                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2137                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2138                   }
2139                 }
2140               }
2141             } else {
2142               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2143             }
2144           }
2145         } else { /* simple transformation block */
2146           PetscInt row,col;
2147           PetscScalar val;
2148           for (j=0;j<size_of_constraint;j++) {
2149             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2150             if (!PetscBTLookup(is_primal,row)) {
2151               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2152               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2153               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2154             } else {
2155               for (k=0;k<size_of_constraint;k++) {
2156                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2157                 if (row != col) {
2158                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2159                 } else {
2160                   val = 1.0;
2161                 }
2162                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2163               }
2164             }
2165           }
2166           if (pcbddc->dbg_flag) {
2167             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2168           }
2169         }
2170         /* increment primal counter */
2171         primal_counter += primal_dofs;
2172       } else {
2173         if (pcbddc->dbg_flag) {
2174           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2175         }
2176       }
2177       /* increment constraint counter total_counts */
2178       total_counts += primal_dofs;
2179     }
2180 
2181     /* free workspace */
2182     if (qr_needed) {
2183       if (pcbddc->dbg_flag) {
2184         ierr = PetscFree(work);CHKERRQ(ierr);
2185       }
2186       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2187       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2188       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2189       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2190       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2191     }
2192     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2193     /* assembling */
2194     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2195     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2196     /*
2197     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2198     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2199     */
2200   }
2201   /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */
2202   if (pcbddc->user_ChangeOfBasisMatrix) {
2203     PetscInt rows,cols;
2204     ierr = MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);CHKERRQ(ierr);
2205     if (rows == pcis->n && cols == pcis->n) {
2206       ierr = MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2207     } else {
2208       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2209       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2210     }
2211   }
2212 
2213   /* get indices in local ordering for vertices and constraints */
2214   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2215     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2216     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2217   }
2218   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2219   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2220   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2221   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2222   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2223   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2224   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2225   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2226   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2227   /* set quantities in PCBDDC data struct */
2228   pcbddc->n_actual_vertices = i;
2229   /* check if a new primal space has been introduced */
2230   pcbddc->new_primal_space_local = PETSC_TRUE;
2231   if (olocal_primal_size == pcbddc->local_primal_size) {
2232     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2233     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2234     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2235   }
2236   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2237   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2238 
2239   /* flush dbg viewer */
2240   if (pcbddc->dbg_flag) {
2241     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2242   }
2243 
2244   /* free workspace */
2245   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2246   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2247   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2248   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2249   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2250   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2251   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2252   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2258 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2259 {
2260   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2261   PC_IS       *pcis = (PC_IS*)pc->data;
2262   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2263   PetscInt    ierr,i,vertex_size;
2264   PetscViewer viewer=pcbddc->dbg_viewer;
2265 
2266   PetscFunctionBegin;
2267   /* Reset previously computed graph */
2268   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2269   /* Init local Graph struct */
2270   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2271 
2272   /* Check validity of the csr graph passed in by the user */
2273   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2274     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2275   }
2276 
2277   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2278   if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) {
2279     Mat mat_adj;
2280     const PetscInt *xadj,*adjncy;
2281     PetscBool flg_row=PETSC_TRUE;
2282 
2283     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2284     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2285     if (!flg_row) {
2286       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2287     }
2288     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2289     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2290     if (!flg_row) {
2291       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2292     }
2293     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2294   }
2295 
2296   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2297   vertex_size = 1;
2298   if (pcbddc->user_provided_isfordofs) {
2299     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2300       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2301       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2302         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2303         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2304       }
2305       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2306       pcbddc->n_ISForDofs = 0;
2307       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2308     }
2309     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2310     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2311   } else {
2312     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2313       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2314       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2315       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2316         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2317       }
2318     }
2319   }
2320 
2321   /* Setup of Graph */
2322   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2323     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2324   }
2325   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2326     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2327   }
2328   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2329 
2330   /* Graph's connected components analysis */
2331   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2332 
2333   /* print some info to stdout */
2334   if (pcbddc->dbg_flag) {
2335     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2336   }
2337 
2338   /* mark topography has done */
2339   pcbddc->recompute_topography = PETSC_FALSE;
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2345 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2346 {
2347   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2348   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2349   PetscErrorCode ierr;
2350 
2351   PetscFunctionBegin;
2352   n = 0;
2353   vertices = 0;
2354   if (pcbddc->ConstraintMatrix) {
2355     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2356     for (i=0;i<local_primal_size;i++) {
2357       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2358       if (size_of_constraint == 1) n++;
2359       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2360     }
2361     if (vertices_idx) {
2362       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2363       n = 0;
2364       for (i=0;i<local_primal_size;i++) {
2365         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2366         if (size_of_constraint == 1) {
2367           vertices[n++]=row_cmat_indices[0];
2368         }
2369         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2370       }
2371     }
2372   }
2373   *n_vertices = n;
2374   if (vertices_idx) *vertices_idx = vertices;
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 #undef __FUNCT__
2379 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2380 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2381 {
2382   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2383   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2384   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2385   PetscBT        touched;
2386   PetscErrorCode ierr;
2387 
2388     /* This function assumes that the number of local constraints per connected component
2389        is not greater than the number of nodes defined for the connected component
2390        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2391   PetscFunctionBegin;
2392   n = 0;
2393   constraints_index = 0;
2394   if (pcbddc->ConstraintMatrix) {
2395     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2396     max_size_of_constraint = 0;
2397     for (i=0;i<local_primal_size;i++) {
2398       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2399       if (size_of_constraint > 1) {
2400         n++;
2401       }
2402       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2403       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2404     }
2405     if (constraints_idx) {
2406       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2407       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2408       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2409       n = 0;
2410       for (i=0;i<local_primal_size;i++) {
2411         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2412         if (size_of_constraint > 1) {
2413           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2414           /* find first untouched local node */
2415           j = 0;
2416           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2417           min_index = row_cmat_global_indices[j];
2418           min_loc = j;
2419           /* search the minimum among nodes not yet touched on the connected component
2420              since there can be more than one constraint on a single cc */
2421           for (j=1;j<size_of_constraint;j++) {
2422             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2423               min_index = row_cmat_global_indices[j];
2424               min_loc = j;
2425             }
2426           }
2427           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2428           constraints_index[n++] = row_cmat_indices[min_loc];
2429         }
2430         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2431       }
2432       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2433       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2434     }
2435   }
2436   *n_constraints = n;
2437   if (constraints_idx) *constraints_idx = constraints_index;
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 /* the next two functions has been adapted from pcis.c */
2442 #undef __FUNCT__
2443 #define __FUNCT__ "PCBDDCApplySchur"
2444 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2445 {
2446   PetscErrorCode ierr;
2447   PC_IS          *pcis = (PC_IS*)(pc->data);
2448 
2449   PetscFunctionBegin;
2450   if (!vec2_B) { vec2_B = v; }
2451   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2452   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2453   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2454   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2455   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 #undef __FUNCT__
2460 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2461 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2462 {
2463   PetscErrorCode ierr;
2464   PC_IS          *pcis = (PC_IS*)(pc->data);
2465 
2466   PetscFunctionBegin;
2467   if (!vec2_B) { vec2_B = v; }
2468   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2469   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2470   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2471   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2472   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 #undef __FUNCT__
2477 #define __FUNCT__ "PCBDDCSubsetNumbering"
2478 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2479 {
2480   Vec            local_vec,global_vec;
2481   IS             seqis,paris;
2482   VecScatter     scatter_ctx;
2483   PetscScalar    *array;
2484   PetscInt       *temp_global_dofs;
2485   PetscScalar    globalsum;
2486   PetscInt       i,j,s;
2487   PetscInt       nlocals,first_index,old_index,max_local;
2488   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2489   PetscMPIInt    *dof_sizes,*dof_displs;
2490   PetscBool      first_found;
2491   PetscErrorCode ierr;
2492 
2493   PetscFunctionBegin;
2494   /* mpi buffers */
2495   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2496   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2497   j = ( !rank_prec_comm ? size_prec_comm : 0);
2498   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2499   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2500   /* get maximum size of subset */
2501   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2502   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2503   max_local = 0;
2504   for (i=0;i<n_local_dofs;i++) {
2505     if (max_local < temp_global_dofs[i] ) {
2506       max_local = temp_global_dofs[i];
2507     }
2508   }
2509   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2510   max_global++;
2511   max_local = 0;
2512   for (i=0;i<n_local_dofs;i++) {
2513     if (max_local < local_dofs[i] ) {
2514       max_local = local_dofs[i];
2515     }
2516   }
2517   max_local++;
2518   /* allocate workspace */
2519   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2520   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2521   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2522   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2523   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2524   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2525   /* create scatter */
2526   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2527   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2528   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2529   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2530   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2531   /* init array */
2532   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2533   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2534   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2535   if (local_dofs_mult) {
2536     for (i=0;i<n_local_dofs;i++) {
2537       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2538     }
2539   } else {
2540     for (i=0;i<n_local_dofs;i++) {
2541       array[local_dofs[i]]=1.0;
2542     }
2543   }
2544   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2545   /* scatter into global vec and get total number of global dofs */
2546   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2547   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2548   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2549   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2550   /* Fill global_vec with cumulative function for global numbering */
2551   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2552   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2553   nlocals = 0;
2554   first_index = -1;
2555   first_found = PETSC_FALSE;
2556   for (i=0;i<s;i++) {
2557     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2558       first_found = PETSC_TRUE;
2559       first_index = i;
2560     }
2561     nlocals += (PetscInt)PetscRealPart(array[i]);
2562   }
2563   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2564   if (!rank_prec_comm) {
2565     dof_displs[0]=0;
2566     for (i=1;i<size_prec_comm;i++) {
2567       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2568     }
2569   }
2570   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2571   if (first_found) {
2572     array[first_index] += (PetscScalar)nlocals;
2573     old_index = first_index;
2574     for (i=first_index+1;i<s;i++) {
2575       if (PetscRealPart(array[i]) > 0.1) {
2576         array[i] += array[old_index];
2577         old_index = i;
2578       }
2579     }
2580   }
2581   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2582   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2583   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2584   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2585   /* get global ordering of local dofs */
2586   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2587   if (local_dofs_mult) {
2588     for (i=0;i<n_local_dofs;i++) {
2589       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2590     }
2591   } else {
2592     for (i=0;i<n_local_dofs;i++) {
2593       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2594     }
2595   }
2596   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2597   /* free workspace */
2598   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2599   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2600   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2601   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2602   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2603   /* return pointer to global ordering of local dofs */
2604   *global_numbering_subset = temp_global_dofs;
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 #undef __FUNCT__
2609 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2610 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2611 {
2612   PetscInt       i,j;
2613   PetscScalar    *alphas;
2614   PetscErrorCode ierr;
2615 
2616   PetscFunctionBegin;
2617   /* this implements stabilized Gram-Schmidt */
2618   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2619   for (i=0;i<n;i++) {
2620     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2621     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2622     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2623   }
2624   ierr = PetscFree(alphas);CHKERRQ(ierr);
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 #undef __FUNCT__
2629 #define __FUNCT__ "MatISGetSubassemblingPattern"
2630 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2631 {
2632   Mat             subdomain_adj;
2633   IS              new_ranks,ranks_send_to;
2634   MatPartitioning partitioner;
2635   Mat_IS          *matis;
2636   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2637   PetscInt        prank;
2638   PetscMPIInt     size,rank,color;
2639   PetscInt        *xadj,*adjncy,*oldranks;
2640   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2641   PetscInt        i,j,local_size,threshold=0;
2642   PetscErrorCode  ierr;
2643   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2644   PetscSubcomm    subcomm;
2645 
2646   PetscFunctionBegin;
2647   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2648   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2649   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2650 
2651   /* Get info on mapping */
2652   matis = (Mat_IS*)(mat->data);
2653   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2654   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2655 
2656   /* build local CSR graph of subdomains' connectivity */
2657   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2658   xadj[0] = 0;
2659   xadj[1] = PetscMax(n_neighs-1,0);
2660   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2661   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2662 
2663   if (threshold) {
2664     PetscInt* count,min_threshold;
2665     ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr);
2666     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2667     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2668       for (j=0;j<n_shared[i];j++) {
2669         count[shared[i][j]] += 1;
2670       }
2671     }
2672     /* adapt threshold since we dont want to lose significant connections */
2673     min_threshold = n_neighs;
2674     for (i=1;i<n_neighs;i++) {
2675       for (j=0;j<n_shared[i];j++) {
2676         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2677       }
2678     }
2679     threshold = PetscMax(min_threshold+1,threshold);
2680     xadj[1] = 0;
2681     for (i=1;i<n_neighs;i++) {
2682       for (j=0;j<n_shared[i];j++) {
2683         if (count[shared[i][j]] < threshold) {
2684           adjncy[xadj[1]] = neighs[i];
2685           adjncy_wgt[xadj[1]] = n_shared[i];
2686           xadj[1]++;
2687           break;
2688         }
2689       }
2690     }
2691     ierr = PetscFree(count);CHKERRQ(ierr);
2692   } else {
2693     if (xadj[1]) {
2694       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2695       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2696     }
2697   }
2698   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2699   if (use_square) {
2700     for (i=0;i<xadj[1];i++) {
2701       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2702     }
2703   }
2704   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2705 
2706   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2707 
2708   /*
2709     Restrict work on active processes only.
2710   */
2711   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2712   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2713   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2714   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2715   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2716   if (color) {
2717     ierr = PetscFree(xadj);CHKERRQ(ierr);
2718     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2719     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2720   } else {
2721     PetscInt coarsening_ratio;
2722     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2723     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2724     prank = rank;
2725     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2726     /*
2727     for (i=0;i<size;i++) {
2728       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2729     }
2730     */
2731     for (i=0;i<xadj[1];i++) {
2732       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2733     }
2734     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2735     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2736     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2737 
2738     /* Partition */
2739     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2740     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2741     if (use_vwgt) {
2742       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2743       v_wgt[0] = local_size;
2744       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2745     }
2746     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2747     coarsening_ratio = size/n_subdomains;
2748     /* Parmetis does not always give back nparts with small graphs! this should be taken into account */
2749     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
2750     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2751     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2752     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2753 
2754     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2755     if (contiguous) {
2756       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2757     } else {
2758       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2759     }
2760     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2761     /* clean up */
2762     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2763     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2764     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2765     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2766   }
2767   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2768 
2769   /* assemble parallel IS for sends */
2770   i = 1;
2771   if (color) i=0;
2772   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2773 
2774   /* get back IS */
2775   *is_sends = ranks_send_to;
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2780 
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatISSubassemble"
2783 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2784 {
2785   Mat                    local_mat;
2786   Mat_IS                 *matis;
2787   IS                     is_sends_internal;
2788   PetscInt               rows,cols;
2789   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
2790   PetscBool              ismatis,isdense,destroy_mat;
2791   ISLocalToGlobalMapping l2gmap;
2792   PetscInt*              l2gmap_indices;
2793   const PetscInt*        is_indices;
2794   MatType                new_local_type;
2795   /* buffers */
2796   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2797   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
2798   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2799   /* MPI */
2800   MPI_Comm               comm,comm_n;
2801   PetscSubcomm           subcomm;
2802   PetscMPIInt            n_sends,n_recvs,commsize;
2803   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
2804   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
2805   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
2806   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
2807   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
2808   PetscErrorCode         ierr;
2809 
2810   PetscFunctionBegin;
2811   /* TODO: add missing checks */
2812   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
2813   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
2814   PetscValidLogicalCollectiveEnum(mat,reuse,5);
2815   PetscValidLogicalCollectiveInt(mat,nis,7);
2816   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2817   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
2818   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2819   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2820   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2821   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2822   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2823   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
2824     PetscInt mrows,mcols,mnrows,mncols;
2825     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
2826     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2827     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
2828     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
2829     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2830     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2831   }
2832   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2833   PetscValidLogicalCollectiveInt(mat,bs,0);
2834   /* prepare IS for sending if not provided */
2835   if (!is_sends) {
2836     PetscBool pcontig = PETSC_TRUE;
2837     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
2838     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
2839   } else {
2840     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2841     is_sends_internal = is_sends;
2842   }
2843 
2844   /* get pointer of MATIS data */
2845   matis = (Mat_IS*)mat->data;
2846 
2847   /* get comm */
2848   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2849 
2850   /* compute number of sends */
2851   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2852   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2853 
2854   /* compute number of receives */
2855   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2856   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
2857   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2858   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2859   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2860   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2861   ierr = PetscFree(iflags);CHKERRQ(ierr);
2862 
2863   /* restrict comm if requested */
2864   subcomm = 0;
2865   destroy_mat = PETSC_FALSE;
2866   if (restrict_comm) {
2867     PetscMPIInt color,rank,subcommsize;
2868     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2869     color = 0;
2870     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
2871     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2872     subcommsize = commsize - subcommsize;
2873     /* check if reuse has been requested */
2874     if (reuse == MAT_REUSE_MATRIX) {
2875       if (*mat_n) {
2876         PetscMPIInt subcommsize2;
2877         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
2878         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
2879         comm_n = PetscObjectComm((PetscObject)*mat_n);
2880       } else {
2881         comm_n = PETSC_COMM_SELF;
2882       }
2883     } else { /* MAT_INITIAL_MATRIX */
2884       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
2885       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
2886       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2887       comm_n = subcomm->comm;
2888     }
2889     /* flag to destroy *mat_n if not significative */
2890     if (color) destroy_mat = PETSC_TRUE;
2891   } else {
2892     comm_n = comm;
2893   }
2894 
2895   /* prepare send/receive buffers */
2896   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
2897   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2898   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
2899   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2900   if (nis) {
2901     ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr);
2902     ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr);
2903   }
2904 
2905   /* Get data from local matrices */
2906   if (!isdense) {
2907     /* TODO: See below some guidelines on how to prepare the local buffers */
2908     /*
2909        send_buffer_vals should contain the raw values of the local matrix
2910        send_buffer_idxs should contain:
2911        - MatType_PRIVATE type
2912        - PetscInt        size_of_l2gmap
2913        - PetscInt        global_row_indices[size_of_l2gmap]
2914        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2915     */
2916   } else {
2917     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2918     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2919     ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr);
2920     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2921     send_buffer_idxs[1] = i;
2922     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2923     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2924     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2925     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2926     for (i=0;i<n_sends;i++) {
2927       ilengths_vals[is_indices[i]] = len*len;
2928       ilengths_idxs[is_indices[i]] = len+2;
2929     }
2930   }
2931   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2932   /* additional is (if any) */
2933   if (nis) {
2934     PetscMPIInt psum;
2935     PetscInt j;
2936     for (j=0,psum=0;j<nis;j++) {
2937       PetscInt plen;
2938       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
2939       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
2940       psum += len+1; /* indices + lenght */
2941     }
2942     ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr);
2943     for (j=0,psum=0;j<nis;j++) {
2944       PetscInt plen;
2945       const PetscInt *is_array_idxs;
2946       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
2947       send_buffer_idxs_is[psum] = plen;
2948       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
2949       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
2950       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
2951       psum += plen+1; /* indices + lenght */
2952     }
2953     for (i=0;i<n_sends;i++) {
2954       ilengths_idxs_is[is_indices[i]] = psum;
2955     }
2956     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
2957   }
2958 
2959   buf_size_idxs = 0;
2960   buf_size_vals = 0;
2961   buf_size_idxs_is = 0;
2962   for (i=0;i<n_recvs;i++) {
2963     buf_size_idxs += (PetscInt)olengths_idxs[i];
2964     buf_size_vals += (PetscInt)olengths_vals[i];
2965     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
2966   }
2967   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
2968   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
2969   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
2970 
2971   /* get new tags for clean communications */
2972   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2973   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2974   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
2975 
2976   /* allocate for requests */
2977   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
2978   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
2979   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
2980   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
2981   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
2982   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
2983 
2984   /* communications */
2985   ptr_idxs = recv_buffer_idxs;
2986   ptr_vals = recv_buffer_vals;
2987   ptr_idxs_is = recv_buffer_idxs_is;
2988   for (i=0;i<n_recvs;i++) {
2989     source_dest = onodes[i];
2990     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2991     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2992     ptr_idxs += olengths_idxs[i];
2993     ptr_vals += olengths_vals[i];
2994     if (nis) {
2995       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
2996       ptr_idxs_is += olengths_idxs_is[i];
2997     }
2998   }
2999   for (i=0;i<n_sends;i++) {
3000     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3001     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3002     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3003     if (nis) {
3004       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3005     }
3006   }
3007   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3008   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3009 
3010   /* assemble new l2g map */
3011   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3012   ptr_idxs = recv_buffer_idxs;
3013   buf_size_idxs = 0;
3014   for (i=0;i<n_recvs;i++) {
3015     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3016     ptr_idxs += olengths_idxs[i];
3017   }
3018   ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr);
3019   ptr_idxs = recv_buffer_idxs;
3020   buf_size_idxs = 0;
3021   for (i=0;i<n_recvs;i++) {
3022     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3023     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3024     ptr_idxs += olengths_idxs[i];
3025   }
3026   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3027   ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3028   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3029 
3030   /* infer new local matrix type from received local matrices type */
3031   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3032   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3033   if (n_recvs) {
3034     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3035     ptr_idxs = recv_buffer_idxs;
3036     for (i=0;i<n_recvs;i++) {
3037       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3038         new_local_type_private = MATAIJ_PRIVATE;
3039         break;
3040       }
3041       ptr_idxs += olengths_idxs[i];
3042     }
3043     switch (new_local_type_private) {
3044       case MATDENSE_PRIVATE:
3045         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3046           new_local_type = MATSEQAIJ;
3047           bs = 1;
3048         } else { /* if I receive only 1 dense matrix */
3049           new_local_type = MATSEQDENSE;
3050           bs = 1;
3051         }
3052         break;
3053       case MATAIJ_PRIVATE:
3054         new_local_type = MATSEQAIJ;
3055         bs = 1;
3056         break;
3057       case MATBAIJ_PRIVATE:
3058         new_local_type = MATSEQBAIJ;
3059         break;
3060       case MATSBAIJ_PRIVATE:
3061         new_local_type = MATSEQSBAIJ;
3062         break;
3063       default:
3064         SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3065         break;
3066     }
3067   } else { /* by default, new_local_type is seqdense */
3068     new_local_type = MATSEQDENSE;
3069     bs = 1;
3070   }
3071 
3072   /* create MATIS object if needed */
3073   if (reuse == MAT_INITIAL_MATRIX) {
3074     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3075     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3076   } else {
3077     /* it also destroys the local matrices */
3078     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3079   }
3080   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3081   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3082   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3083   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3084 
3085   /* set values */
3086   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3087   ptr_vals = recv_buffer_vals;
3088   ptr_idxs = recv_buffer_idxs;
3089   for (i=0;i<n_recvs;i++) {
3090     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3091       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3092       ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3093       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3094       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3095       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3096     } else {
3097       /* TODO */
3098     }
3099     ptr_idxs += olengths_idxs[i];
3100     ptr_vals += olengths_vals[i];
3101   }
3102   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3103   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3104   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3105   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3106 
3107 #if 0
3108   if (!restrict_comm) { /* check */
3109     Vec       lvec,rvec;
3110     PetscReal infty_error;
3111 
3112     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3113     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3114     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3115     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3116     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3117     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3118     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3119     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3120     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3121   }
3122 #endif
3123 
3124   /* assemble new additional is (if any) */
3125   if (nis) {
3126     PetscInt **temp_idxs,*count_is,j,psum;
3127 
3128     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3129     ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr);
3130     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3131     ptr_idxs = recv_buffer_idxs_is;
3132     psum = 0;
3133     for (i=0;i<n_recvs;i++) {
3134       for (j=0;j<nis;j++) {
3135         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3136         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3137         psum += plen;
3138         ptr_idxs += plen+1; /* shift pointer to received data */
3139       }
3140     }
3141     ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr);
3142     ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr);
3143     for (i=1;i<nis;i++) {
3144       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3145     }
3146     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3147     ptr_idxs = recv_buffer_idxs_is;
3148     for (i=0;i<n_recvs;i++) {
3149       for (j=0;j<nis;j++) {
3150         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3151         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3152         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3153         ptr_idxs += plen+1; /* shift pointer to received data */
3154       }
3155     }
3156     for (i=0;i<nis;i++) {
3157       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3158       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3159       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3160     }
3161     ierr = PetscFree(count_is);CHKERRQ(ierr);
3162     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3163     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3164   }
3165   /* free workspace */
3166   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3167   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3168   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3169   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3170   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3171   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3172   if (isdense) {
3173     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3174     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3175   } else {
3176     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3177   }
3178   if (nis) {
3179     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3180     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3181   }
3182   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3183   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3184   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3185   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3186   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3187   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3188   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3189   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3190   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3191   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3192   ierr = PetscFree(onodes);CHKERRQ(ierr);
3193   if (nis) {
3194     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3195     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3196     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3197   }
3198   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3199   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3200     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3201     for (i=0;i<nis;i++) {
3202       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3203     }
3204   }
3205   PetscFunctionReturn(0);
3206 }
3207 
3208 /* temporary hack into ksp private data structure */
3209 #include <petsc-private/kspimpl.h>
3210 
3211 #undef __FUNCT__
3212 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3213 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3214 {
3215   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3216   PC_IS                  *pcis = (PC_IS*)pc->data;
3217   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3218   MatNullSpace           CoarseNullSpace=NULL;
3219   ISLocalToGlobalMapping coarse_islg;
3220   IS                     coarse_is,*isarray;
3221   PetscInt               i,im_active=-1,active_procs=-1;
3222   PetscInt               nis,nisdofs,nisneu;
3223   PC                     pc_temp;
3224   PCType                 coarse_pc_type;
3225   KSPType                coarse_ksp_type;
3226   PetscBool              multilevel_requested,multilevel_allowed;
3227   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3228   Mat                    t_coarse_mat_is;
3229   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3230   PetscMPIInt            all_procs;
3231   PetscBool              csin_ml,csin_ds,csin,csin_type_simple;
3232   PetscBool              compute_vecs = PETSC_FALSE;
3233   PetscErrorCode         ierr;
3234 
3235   PetscFunctionBegin;
3236   /* Assign global numbering to coarse dofs */
3237   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3238     compute_vecs = PETSC_TRUE;
3239     PetscInt ocoarse_size;
3240     ocoarse_size = pcbddc->coarse_size;
3241     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3242     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3243     /* see if we can avoid some work */
3244     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3245       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3246         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3247         coarse_reuse = PETSC_FALSE;
3248       } else { /* we can safely reuse already computed coarse matrix */
3249         coarse_reuse = PETSC_TRUE;
3250       }
3251     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3252       coarse_reuse = PETSC_FALSE;
3253     }
3254     /* reset any subassembling information */
3255     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3256     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3257   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3258     coarse_reuse = PETSC_TRUE;
3259   }
3260 
3261   /* count "active" (i.e. with positive local size) and "void" processes */
3262   im_active = !!(pcis->n);
3263   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3264   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3265   void_procs = all_procs-active_procs;
3266   csin_type_simple = PETSC_TRUE;
3267   if (pcbddc->current_level) {
3268     csin_ml = PETSC_TRUE;
3269     ncoarse_ml = void_procs;
3270     csin_ds = PETSC_TRUE;
3271     ncoarse_ds = void_procs;
3272     if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
3273   } else {
3274     csin_ml = PETSC_FALSE;
3275     ncoarse_ml = all_procs;
3276     if (void_procs) {
3277       csin_ds = PETSC_TRUE;
3278       ncoarse_ds = void_procs;
3279       csin_type_simple = PETSC_FALSE;
3280     } else {
3281       csin_ds = PETSC_FALSE;
3282       ncoarse_ds = all_procs;
3283     }
3284   }
3285 
3286   /*
3287     test if we can go multilevel: three conditions must be satisfied:
3288     - we have not exceeded the number of levels requested
3289     - we can actually subassemble the active processes
3290     - we can find a suitable number of MPI processes where we can place the subassembled problem
3291   */
3292   multilevel_allowed = PETSC_FALSE;
3293   multilevel_requested = PETSC_FALSE;
3294   if (pcbddc->current_level < pcbddc->max_levels) {
3295     multilevel_requested = PETSC_TRUE;
3296     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3297       multilevel_allowed = PETSC_FALSE;
3298     } else {
3299       multilevel_allowed = PETSC_TRUE;
3300     }
3301   }
3302   /* determine number of process partecipating to coarse solver */
3303   if (multilevel_allowed) {
3304     ncoarse = ncoarse_ml;
3305     csin = csin_ml;
3306   } else {
3307     ncoarse = ncoarse_ds;
3308     csin = csin_ds;
3309   }
3310 
3311   /* creates temporary l2gmap and IS for coarse indexes */
3312   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3313   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3314 
3315   /* creates temporary MATIS object for coarse matrix */
3316   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3317 #if 0
3318   {
3319     PetscViewer viewer;
3320     char filename[256];
3321     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3322     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3323     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3324     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3325     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3326   }
3327 #endif
3328   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3329   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3330   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3331   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3332   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3333 
3334   /* compute dofs splitting and neumann boundaries for coarse dofs */
3335   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3336     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3337     const PetscInt         *idxs;
3338     ISLocalToGlobalMapping tmap;
3339 
3340     /* create map between primal indices (in local representative ordering) and local primal numbering */
3341     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3342     /* allocate space for temporary storage */
3343     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3344     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3345     /* allocate for IS array */
3346     nisdofs = pcbddc->n_ISForDofsLocal;
3347     nisneu = !!pcbddc->NeumannBoundariesLocal;
3348     nis = nisdofs + nisneu;
3349     ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr);
3350     /* dofs splitting */
3351     for (i=0;i<nisdofs;i++) {
3352       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3353       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3354       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3355       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3356       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3357       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3358       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3359       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3360     }
3361     /* neumann boundaries */
3362     if (pcbddc->NeumannBoundariesLocal) {
3363       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3364       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3365       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3366       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3367       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3368       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3369       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3370       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3371     }
3372     /* free memory */
3373     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3374     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3375     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3376   } else {
3377     nis = 0;
3378     nisdofs = 0;
3379     nisneu = 0;
3380     isarray = NULL;
3381   }
3382   /* destroy no longer needed map */
3383   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3384 
3385   /* restrict on coarse candidates (if needed) */
3386   coarse_mat_is = NULL;
3387   if (csin) {
3388     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3389       PetscInt j,tissize,*nisindices;
3390       PetscInt *coarse_candidates;
3391       const PetscInt* tisindices;
3392       /* get coarse candidates' ranks in pc communicator */
3393       ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr);
3394       ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3395       for (i=0,j=0;i<all_procs;i++) {
3396         if (!coarse_candidates[i]) {
3397           coarse_candidates[j]=i;
3398           j++;
3399         }
3400       }
3401       if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3402       /* get a suitable subassembling pattern */
3403       if (csin_type_simple) {
3404         PetscMPIInt rank;
3405         PetscInt    issize,isidx;
3406         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3407         if (im_active) {
3408           issize = 1;
3409           isidx = (PetscInt)rank;
3410         } else {
3411           issize = 0;
3412           isidx = -1;
3413         }
3414         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3415       } else {
3416         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3417       }
3418       if (pcbddc->dbg_flag) {
3419         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3420         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3421         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3422         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3423         for (i=0;i<j;i++) {
3424           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3425         }
3426         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3427         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3428       }
3429       /* shift the pattern on coarse candidates */
3430       ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3431       ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3432       ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr);
3433       for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3434       ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3435       ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3436       ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3437     }
3438     if (pcbddc->dbg_flag) {
3439       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3440       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3441       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3442       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3443     }
3444     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3445     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3446   } else {
3447     if (pcbddc->dbg_flag) {
3448       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3449       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3450       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3451     }
3452     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3453     coarse_mat_is = t_coarse_mat_is;
3454   }
3455 
3456   /* create local to global scatters for coarse problem */
3457   if (compute_vecs) {
3458     PetscInt lrows;
3459     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3460     if (coarse_mat_is) {
3461       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3462     } else {
3463       lrows = 0;
3464     }
3465     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3466     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3467     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3468     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3469     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3470   }
3471   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3472   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3473 
3474   /* set defaults for coarse KSP and PC */
3475   if (multilevel_allowed) {
3476     coarse_ksp_type = KSPRICHARDSON;
3477     coarse_pc_type = PCBDDC;
3478   } else {
3479     coarse_ksp_type = KSPPREONLY;
3480     coarse_pc_type = PCREDUNDANT;
3481   }
3482 
3483   /* print some info if requested */
3484   if (pcbddc->dbg_flag) {
3485     if (!multilevel_allowed) {
3486       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3487       if (multilevel_requested) {
3488         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3489       } else if (pcbddc->max_levels) {
3490         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3491       }
3492       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3493     }
3494   }
3495 
3496   /* create the coarse KSP object only once with defaults */
3497   if (coarse_mat_is) {
3498     MatReuse coarse_mat_reuse;
3499     PetscViewer dbg_viewer = NULL;
3500     if (pcbddc->dbg_flag) {
3501       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3502       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3503     }
3504     if (!pcbddc->coarse_ksp) {
3505       char prefix[256],str_level[16];
3506       size_t len;
3507       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3508       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3509       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3510       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3511       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3512       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3513       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3514       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3515       /* prefix */
3516       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3517       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3518       if (!pcbddc->current_level) {
3519         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3520         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3521       } else {
3522         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3523         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3524         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3525         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3526         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3527         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3528       }
3529       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3530       /* allow user customization */
3531       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3532       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3533     }
3534 
3535     /* get some info after set from options */
3536     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3537     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3538     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3539     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3540     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3541       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3542       isbddc = PETSC_FALSE;
3543     }
3544     if (isredundant) {
3545       KSP inner_ksp;
3546       PC inner_pc;
3547       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3548       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3549       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3550     }
3551 
3552     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3553     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3554     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3555     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3556     if (nisdofs) {
3557       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3558       for (i=0;i<nisdofs;i++) {
3559         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3560       }
3561     }
3562     if (nisneu) {
3563       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3564       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3565     }
3566 
3567     /* assemble coarse matrix */
3568     if (coarse_reuse) {
3569       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3570       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3571       coarse_mat_reuse = MAT_REUSE_MATRIX;
3572     } else {
3573       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3574     }
3575     if (isbddc || isnn) {
3576       if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3577         ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3578         if (pcbddc->dbg_flag) {
3579           ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3580           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3581           ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3582           ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3583         }
3584       }
3585       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3586     } else {
3587       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3588     }
3589     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3590 
3591     /* propagate symmetry info to coarse matrix */
3592     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3593     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3594 
3595     /* set operators */
3596     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3597     if (pcbddc->dbg_flag) {
3598       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3599     }
3600   } else { /* processes non partecipating to coarse solver (if any) */
3601     coarse_mat = 0;
3602   }
3603   ierr = PetscFree(isarray);CHKERRQ(ierr);
3604 #if 0
3605   {
3606     PetscViewer viewer;
3607     char filename[256];
3608     sprintf(filename,"coarse_mat.m");
3609     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3610     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3611     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3612     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3613   }
3614 #endif
3615 
3616   /* Compute coarse null space (special handling by BDDC only) */
3617   if (pcbddc->NullSpace) {
3618     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3619   }
3620 
3621   if (pcbddc->coarse_ksp) {
3622     Vec crhs,csol;
3623     PetscBool ispreonly;
3624     if (CoarseNullSpace) {
3625       if (isbddc) {
3626         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3627       } else {
3628         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3629       }
3630     }
3631     /* setup coarse ksp */
3632     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3633     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3634     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3635     /* hack */
3636     if (!csol) {
3637       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3638     }
3639     if (!crhs) {
3640       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3641     }
3642     /* Check coarse problem if in debug mode or if solving with an iterative method */
3643     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3644     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3645       KSP       check_ksp;
3646       KSPType   check_ksp_type;
3647       PC        check_pc;
3648       Vec       check_vec,coarse_vec;
3649       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3650       PetscInt  its;
3651       PetscBool compute_eigs;
3652       PetscReal *eigs_r,*eigs_c;
3653       PetscInt  neigs;
3654       const char *prefix;
3655 
3656       /* Create ksp object suitable for estimation of extreme eigenvalues */
3657       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3658       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3659       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3660       if (ispreonly) {
3661         check_ksp_type = KSPPREONLY;
3662         compute_eigs = PETSC_FALSE;
3663       } else {
3664         check_ksp_type = KSPGMRES;
3665         compute_eigs = PETSC_TRUE;
3666       }
3667       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3668       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
3669       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
3670       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
3671       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
3672       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
3673       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
3674       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
3675       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3676       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3677       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3678       /* create random vec */
3679       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
3680       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
3681       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3682       if (CoarseNullSpace) {
3683         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3684       }
3685       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3686       /* solve coarse problem */
3687       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
3688       if (CoarseNullSpace) {
3689         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
3690       }
3691       /* set eigenvalue estimation if preonly has not been requested */
3692       if (compute_eigs) {
3693         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr);
3694         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr);
3695         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
3696         lambda_max = eigs_r[neigs-1];
3697         lambda_min = eigs_r[0];
3698         if (pcbddc->use_coarse_estimates) {
3699           if (lambda_max>lambda_min) {
3700             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3701             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3702           }
3703         }
3704       }
3705 
3706       /* check coarse problem residual error */
3707       if (pcbddc->dbg_flag) {
3708         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
3709         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3710         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
3711         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3712         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3713         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3714         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3715         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
3716         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
3717         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
3718         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3719         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3720         if (compute_eigs) {
3721           PetscReal lambda_max_s,lambda_min_s;
3722           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3723           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
3724           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
3725           for (i=0;i<neigs;i++) {
3726             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
3727           }
3728         }
3729         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3730         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3731       }
3732       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3733       if (compute_eigs) {
3734         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
3735         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
3736       }
3737     }
3738   }
3739   /* print additional info */
3740   if (pcbddc->dbg_flag) {
3741     /* waits until all processes reaches this point */
3742     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
3743     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3744     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3745   }
3746 
3747   /* free memory */
3748   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3749   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3750   PetscFunctionReturn(0);
3751 }
3752 
3753 #undef __FUNCT__
3754 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3755 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3756 {
3757   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3758   PC_IS*         pcis = (PC_IS*)pc->data;
3759   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3760   PetscInt       i,coarse_size;
3761   PetscInt       *local_primal_indices;
3762   PetscErrorCode ierr;
3763 
3764   PetscFunctionBegin;
3765   /* Compute global number of coarse dofs */
3766   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3767     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3768   }
3769   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3770 
3771   /* check numbering */
3772   if (pcbddc->dbg_flag) {
3773     PetscScalar coarsesum,*array;
3774     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
3775 
3776     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3777     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3778     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3779     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3780     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3781     for (i=0;i<pcbddc->local_primal_size;i++) {
3782       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3783     }
3784     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3785     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3786     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3787     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3788     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3789     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3790     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3791     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3792     for (i=0;i<pcis->n;i++) {
3793       if (array[i] == 1.0) {
3794         set_error = PETSC_TRUE;
3795         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3796       }
3797     }
3798     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3799     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3800     for (i=0;i<pcis->n;i++) {
3801       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3802     }
3803     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3804     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3805     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3806     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3807     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3808     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3809     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
3810       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3811       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3812       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3813       for (i=0;i<pcbddc->local_primal_size;i++) {
3814         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3815       }
3816       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3817     }
3818     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3819     if (set_error_reduced) {
3820       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
3821     }
3822   }
3823   /* get back data */
3824   *coarse_size_n = coarse_size;
3825   *local_primal_indices_n = local_primal_indices;
3826   PetscFunctionReturn(0);
3827 }
3828 
3829 #undef __FUNCT__
3830 #define __FUNCT__ "PCBDDCGlobalToLocal"
3831 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
3832 {
3833   IS             localis_t;
3834   PetscInt       i,lsize,*idxs,n;
3835   PetscScalar    *vals;
3836   PetscErrorCode ierr;
3837 
3838   PetscFunctionBegin;
3839   /* get indices in local ordering exploiting local to global map */
3840   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
3841   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3842   for (i=0;i<lsize;i++) vals[i] = 1.0;
3843   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3844   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
3845   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
3846   if (idxs) { /* multilevel guard */
3847     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
3848   }
3849   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
3850   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3851   ierr = PetscFree(vals);CHKERRQ(ierr);
3852   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
3853   /* now compute set in local ordering */
3854   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3855   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3856   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
3857   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
3858   for (i=0,lsize=0;i<n;i++) {
3859     if (PetscRealPart(vals[i]) > 0.5) {
3860       lsize++;
3861     }
3862   }
3863   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
3864   for (i=0,lsize=0;i<n;i++) {
3865     if (PetscRealPart(vals[i]) > 0.5) {
3866       idxs[lsize++] = i;
3867     }
3868   }
3869   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
3870   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
3871   *localis = localis_t;
3872   PetscFunctionReturn(0);
3873 }
3874