xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCSetUpSolvers"
10 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PetscScalar    *coarse_submat_vals;
14   PetscErrorCode ierr;
15 
16   PetscFunctionBegin;
17   /* Compute matrix after change of basis and extract local submatrices */
18   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D */
21   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
22   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
23 
24   /* Setup local solvers ksp_D and ksp_R */
25   /* PCBDDCSetUpLocalScatters should be called first! */
26   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
27 
28   /* Change global null space passed in by the user if change of basis has been requested */
29   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
30     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
31   }
32 
33   /*
34      Setup local correction and local part of coarse basis.
35      Gives back the dense local part of the coarse matrix in column major ordering
36   */
37   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* Compute total number of coarse nodes and setup coarse solver */
40   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
41 
42   /* free */
43   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 #undef __FUNCT__
48 #define __FUNCT__ "PCBDDCResetCustomization"
49 PetscErrorCode PCBDDCResetCustomization(PC pc)
50 {
51   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
52   PetscErrorCode ierr;
53 
54   PetscFunctionBegin;
55   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
57   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
59   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
61   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
62   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
63   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
64   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
65   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNCT__
70 #define __FUNCT__ "PCBDDCResetTopography"
71 PetscErrorCode PCBDDCResetTopography(PC pc)
72 {
73   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
78   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
79   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
80   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
81   PetscFunctionReturn(0);
82 }
83 
84 #undef __FUNCT__
85 #define __FUNCT__ "PCBDDCResetSolvers"
86 PetscErrorCode PCBDDCResetSolvers(PC pc)
87 {
88   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
93   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
100   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
101   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
102   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
103   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
104   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
106   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
107   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
108   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
109   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
110   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
111   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
112   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
113   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
114   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
115   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
116   PetscFunctionReturn(0);
117 }
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
122 {
123   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
124   PC_IS          *pcis = (PC_IS*)pc->data;
125   VecType        impVecType;
126   PetscInt       n_constraints,n_R,old_size;
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   if (!pcbddc->ConstraintMatrix) {
131     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
132   }
133   /* get sizes */
134   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
135   n_R = pcis->n-pcbddc->n_actual_vertices;
136   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
137   /* local work vectors (try to avoid unneeded work)*/
138   /* R nodes */
139   old_size = -1;
140   if (pcbddc->vec1_R) {
141     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
142   }
143   if (n_R != old_size) {
144     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
145     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
146     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
147     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
148     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
149     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
150   }
151   /* local primal dofs */
152   old_size = -1;
153   if (pcbddc->vec1_P) {
154     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
155   }
156   if (pcbddc->local_primal_size != old_size) {
157     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
158     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
159     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
160     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
161   }
162   /* local explicit constraints */
163   old_size = -1;
164   if (pcbddc->vec1_C) {
165     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
166   }
167   if (n_constraints && n_constraints != old_size) {
168     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
169     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
170     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
171     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNCT__
177 #define __FUNCT__ "PCBDDCSetUpCorrection"
178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
179 {
180   PetscErrorCode         ierr;
181   /* pointers to pcis and pcbddc */
182   PC_IS*                 pcis = (PC_IS*)pc->data;
183   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
184   /* submatrices of local problem */
185   Mat                    A_RV,A_VR,A_VV;
186   /* working matrices */
187   Mat                    M1,M2,M3,C_CR;
188   /* working vectors */
189   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
190   /* additional working stuff */
191   IS                     is_aux;
192   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
193   const PetscScalar      *array,*row_cmat_values;
194   const PetscInt         *row_cmat_indices,*idx_R_local;
195   PetscInt               *idx_V_B,*auxindices;
196   PetscInt               n_vertices,n_constraints,size_of_constraint;
197   PetscInt               i,j,n_R,n_D,n_B;
198   PetscBool              unsymmetric_check;
199   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
200   MatType                impMatType;
201   /* some shortcuts to scalars */
202   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
203   /* for debugging purposes */
204   PetscReal              *coarsefunctions_errors,*constraints_errors;
205 
206   PetscFunctionBegin;
207   /* get number of vertices (corners plus constraints with change of basis)
208      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
209   n_vertices = pcbddc->n_actual_vertices;
210   n_constraints = pcbddc->local_primal_size-n_vertices;
211   /* Set Non-overlapping dimensions */
212   n_B = pcis->n_B; n_D = pcis->n - n_B;
213   n_R = pcis->n-n_vertices;
214 
215   /* Set types for local objects needed by BDDC precondtioner */
216   impMatType = MATSEQDENSE;
217 
218   /* Allocating some extra storage just to be safe */
219   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
220   for (i=0;i<pcis->n;i++) auxindices[i]=i;
221 
222   /* vertices in boundary numbering */
223   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
224   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
225   if (i != n_vertices) {
226     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
227   }
228 
229   /* Precompute stuffs needed for preprocessing and application of BDDC*/
230   if (n_constraints) {
231     /* see if we can save some allocations */
232     if (pcbddc->local_auxmat2) {
233       PetscInt on_R,on_constraints;
234       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
235       if (on_R != n_R || on_constraints != n_constraints) {
236         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
237         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
238       }
239     }
240     /* work vectors */
241     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
242     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
243     /* auxiliary matrices */
244     if (!pcbddc->local_auxmat2) {
245       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
246       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
247       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
248       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
249     }
250 
251     /* Extract constraints on R nodes: C_{CR}  */
252     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
253     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
254     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
255 
256     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
257     for (i=0;i<n_constraints;i++) {
258       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
259       /* Get row of constraint matrix in R numbering */
260       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
261       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
262       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
263       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
264       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
265       /* Solve for row of constraint matrix in R numbering */
266       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
267       /* Set values in local_auxmat2 */
268       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
269       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
270       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
271     }
272     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
273     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
274     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
275 
276     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
277     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
278     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
279     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
280     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
281     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
282     ierr = MatSetUp(M1);CHKERRQ(ierr);
283     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
284     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
285     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
286     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
287     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
288     ierr = MatDestroy(&M2);CHKERRQ(ierr);
289     ierr = MatDestroy(&M3);CHKERRQ(ierr);
290     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
291     if (!pcbddc->local_auxmat1) {
292       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
293     } else {
294       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
295     }
296   }
297 
298   /* Get submatrices from subdomain matrix */
299   if (n_vertices) {
300     PetscInt ibs,mbs;
301     PetscBool issbaij;
302     Mat newmat;
303 
304     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
305     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
306     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
307     if (ibs != mbs) { /* need to convert to SEQAIJ */
308       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
309       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
310       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
311       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
312       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
313     } else {
314       /* this is safe */
315       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
316       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
317       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
318         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
319         /* which of the two approaches is faster? */
320         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
321         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
322         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
323         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
324         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
325       } else {
326         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
327         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
328       }
329     }
330     ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
331     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
332     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
333   }
334 
335   /* Matrix of coarse basis functions (local) */
336   if (pcbddc->coarse_phi_B) {
337     PetscInt on_B,on_primal;
338     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
339     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
340       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
341       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
342     }
343   }
344   if (pcbddc->coarse_phi_D) {
345     PetscInt on_D,on_primal;
346     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
347     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
348       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
349       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
350     }
351   }
352   if (!pcbddc->coarse_phi_B) {
353     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
354     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
355     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
356     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
357   }
358   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
359     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
360     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
361     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
362     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
363   }
364 
365   if (pcbddc->dbg_flag) {
366     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
367     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
368     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
369   }
370   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
371   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
372 
373   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
374 
375   /* vertices */
376   for (i=0;i<n_vertices;i++) {
377     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
378     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
379     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
380     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
381     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
382     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
383     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
384     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
385     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
386     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
387     if (n_constraints) {
388       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
389       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
390       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
391     }
392     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
393     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
394 
395     /* Set values in coarse basis function and subdomain part of coarse_mat */
396     /* coarse basis functions */
397     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
398     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
399     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
400     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
401     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
402     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
403     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
404     if (pcbddc->switch_static || pcbddc->dbg_flag) {
405       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
406       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
408       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
409       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
410     }
411     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
412     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
413     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
414     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
415     if (n_constraints) {
416       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
417       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
418       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
419     }
420 
421     /* check */
422     if (pcbddc->dbg_flag) {
423       /* assemble subdomain vector on local nodes */
424       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
425       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
426       if (n_R) {
427         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
428       }
429       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
430       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
431       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
432       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
433       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
434       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
435       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
436       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
437       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
438       if (n_constraints) {
439         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
440         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
441         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
442       }
443       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
444       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
445       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
446       /* check saddle point solution */
447       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
448       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
449       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
450       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
451       /* shift by the identity matrix */
452       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
453       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
454       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
455       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
456     }
457   }
458 
459   /* constraints */
460   for (i=0;i<n_constraints;i++) {
461     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
462     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
463     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
464     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
465     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
466     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
467     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
468     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
469     if (n_vertices) {
470       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
471     }
472     /* Set values in coarse basis function and subdomain part of coarse_mat */
473     /* coarse basis functions */
474     j = i+n_vertices; /* don't touch this! */
475     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
476     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
479     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
480     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
481     if (pcbddc->switch_static || pcbddc->dbg_flag) {
482       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
483       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
484       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
485       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
486       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
487     }
488     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
489     if (n_vertices) {
490       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
491       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
492       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
493     }
494     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
495     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
496     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
497 
498     if (pcbddc->dbg_flag) {
499       /* assemble subdomain vector on nodes */
500       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
501       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
502       if (n_R) {
503         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
504       }
505       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
506       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
507       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
508       /* assemble subdomain vector of lagrange multipliers */
509       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
510       if (n_vertices) {
511         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
512         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
513         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
514       }
515       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
516       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
517       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
518       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
519       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
520       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
521       /* check saddle point solution */
522       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
523       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
524       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
525       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
526       /* shift by the identity matrix */
527       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
528       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
529       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
530       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
531     }
532   }
533   /* call assembling routines for local coarse basis */
534   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
535   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
536   if (pcbddc->switch_static || pcbddc->dbg_flag) {
537     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
538     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
539   }
540 
541   /* compute other basis functions for non-symmetric problems */
542   /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/
543   { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */
544     PetscBool setsym;
545     ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr);
546     if (!setsym) pcbddc->issym = PETSC_FALSE;
547   }
548 
549   if (!pcbddc->issym) {
550     if (!pcbddc->coarse_psi_B) {
551       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
552       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
553       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
554       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
555     }
556     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
557       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
558       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
559       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
560       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
561     }
562     for (i=0;i<pcbddc->local_primal_size;i++) {
563       if (n_constraints) {
564         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
565         for (j=0;j<n_constraints;j++) {
566           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
567         }
568         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
569         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
570       }
571       if (i<n_vertices) {
572         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
573         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
574         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
575         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
576         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
577         if (n_constraints) {
578           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
579         }
580       } else {
581         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
582       }
583       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
584       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
585       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
586       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
587       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
588       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
589       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
590       if (i<n_vertices) {
591         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
592       }
593       if (pcbddc->switch_static || pcbddc->dbg_flag) {
594         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
595         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
596         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
597         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
598         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
599       }
600 
601       if (pcbddc->dbg_flag) {
602         /* assemble subdomain vector on nodes */
603         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
604         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
605         if (n_R) {
606           ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
607         }
608         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
609         if (i<n_vertices) {
610           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
611         }
612         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
613         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
614         /* assemble subdomain vector of lagrange multipliers */
615         for (j=0;j<pcbddc->local_primal_size;j++) {
616           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
617         }
618         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
619         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
620         /* check saddle point solution */
621         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
622         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
623         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
624         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
625         /* shift by the identity matrix */
626         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
627         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
628         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
629         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
630       }
631     }
632     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
633     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
634     if (pcbddc->switch_static || pcbddc->dbg_flag) {
635       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
636       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
637     }
638     unsymmetric_check = PETSC_TRUE;
639   } else { /* take references to already computed coarse basis */
640     unsymmetric_check = PETSC_FALSE;
641     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
642     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
643     if (pcbddc->coarse_phi_D) {
644       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
645       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
646     }
647   }
648   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
649   /* Checking coarse_sub_mat and coarse basis functios */
650   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
651   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
652   if (pcbddc->dbg_flag) {
653     Mat         coarse_sub_mat;
654     Mat         AUXMAT,TM1,TM2,TM3,TM4;
655     Mat         coarse_phi_D,coarse_phi_B;
656     Mat         coarse_psi_D,coarse_psi_B;
657     Mat         A_II,A_BB,A_IB,A_BI;
658     MatType     checkmattype=MATSEQAIJ;
659     PetscReal   real_value;
660 
661     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
662     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
663     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
664     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
665     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
666     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
667     if (unsymmetric_check) {
668       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
669       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
670     }
671     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
672 
673     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
674     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
675     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
676     if (unsymmetric_check) {
677       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
678       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
679       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
680       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
681       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
682       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
683       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
684       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
685       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
686       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
687       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
688       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
689     } else {
690       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
691       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
692       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
693       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
694       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
695       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
696       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
697       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
698     }
699     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
700     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
701     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
702     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
703     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
704     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
705     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
706     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
707     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
708     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
709     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
710     for (i=0;i<pcbddc->local_primal_size;i++) {
711       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
712     }
713     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
714     for (i=0;i<pcbddc->local_primal_size;i++) {
715       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
716     }
717     if (unsymmetric_check) {
718       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
719       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
720         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
721       }
722       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
723       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
724         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
725       }
726     }
727     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
728     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
729     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
730     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
731     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
732     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
733     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
734     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
735     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
736     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
737     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
738     if (unsymmetric_check) {
739       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
740       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
741     }
742     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
743     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
744     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
745     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
746   }
747   /* free memory */
748   if (n_vertices) {
749     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
750     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
751     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
752     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
753     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
754   }
755   if (n_constraints) {
756     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
757     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
758     ierr = MatDestroy(&M1);CHKERRQ(ierr);
759     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
760   }
761   ierr = PetscFree(auxindices);CHKERRQ(ierr);
762   /* get back data */
763   *coarse_submat_vals_n = coarse_submat_vals;
764   PetscFunctionReturn(0);
765 }
766 
767 #undef __FUNCT__
768 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
769 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
770 {
771   PC_IS*            pcis = (PC_IS*)(pc->data);
772   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
773   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
774   PetscBool         issbaij,isseqaij;
775   /* manage repeated solves */
776   MatReuse          reuse;
777   PetscErrorCode    ierr;
778 
779   PetscFunctionBegin;
780   if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) {
781     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
782   }
783   /* get mat flags */
784   reuse = MAT_INITIAL_MATRIX;
785   if (pc->setupcalled) {
786     if (pc->flag == SAME_NONZERO_PATTERN) {
787       reuse = MAT_REUSE_MATRIX;
788     } else {
789       reuse = MAT_INITIAL_MATRIX;
790     }
791   }
792   if (reuse == MAT_INITIAL_MATRIX) {
793     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
794     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
795     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
796     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
797     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
798   }
799 
800   /* transform local matrices if needed */
801   if (pcbddc->ChangeOfBasisMatrix) {
802     Mat       *change_mat_all;
803     IS        is_local,is_global;
804     PetscBool sorted;
805     PetscInt  *idxs_perm;
806 
807     ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr);
808     ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
809     ierr = ISDestroy(&is_local);CHKERRQ(ierr);
810     ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr);
811     if (!sorted) {
812       const PetscInt *idxs;
813       PetscInt *idxs_sorted,i;
814 
815       ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr);
816       ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr);
817       for (i=0;i<pcis->n;i++) {
818         idxs_perm[i] = i;
819       }
820       ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr);
821       ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr);
822       for (i=0;i<pcis->n;i++) {
823         idxs_sorted[i] = idxs[idxs_perm[i]];
824       }
825       ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr);
826       ierr = ISDestroy(&is_global);CHKERRQ(ierr);
827       ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr);
828     }
829 
830     /* get change of basis on the whole set of local dofs */
831     ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr);
832 
833     if (!sorted) {
834       Mat      new_mat;
835       IS       is_perm;
836       PetscInt *idxs,i;
837 
838       ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr);
839       for (i=0;i<pcis->n;i++) {
840         idxs[idxs_perm[i]] = i;
841       }
842       ierr = PetscFree(idxs_perm);CHKERRQ(ierr);
843       ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr);
844       ierr = ISSetPermutation(is_perm);CHKERRQ(ierr);
845       ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr);
846       ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr);
847       change_mat_all[0] = new_mat;
848       ierr = ISDestroy(&is_perm);CHKERRQ(ierr);
849     }
850 
851     /* check */
852     if (pcbddc->dbg_flag) {
853       Vec       x,x_change;
854       PetscReal error;
855 
856       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
857       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
858       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
859       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
860       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862       ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
863       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
864       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
865       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
866       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
867       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
868       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
869       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
870       ierr = VecDestroy(&x);CHKERRQ(ierr);
871       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
872     }
873 
874     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
875     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
876     if (isseqaij) {
877       ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
878     } else {
879       Mat work_mat;
880       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
881       ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
882       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
883     }
884     /*
885     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
886     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
887     */
888     ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr);
889     ierr = ISDestroy(&is_global);CHKERRQ(ierr);
890   } else {
891     /* without change of basis, the local matrix is unchanged */
892     if (!pcbddc->local_mat) {
893       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
894       pcbddc->local_mat = matis->A;
895     }
896   }
897 
898   /* get submatrices */
899   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
900   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
901   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
902   if (!issbaij) {
903     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
904     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
905   } else {
906     Mat newmat;
907     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
908     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
909     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
910     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
911   }
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
917 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
918 {
919   PC_IS*         pcis = (PC_IS*)(pc->data);
920   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
921   IS             is_aux1,is_aux2;
922   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
923   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
924   PetscInt       vbs,bs;
925   PetscBT        bitmask;
926   PetscErrorCode ierr;
927 
928   PetscFunctionBegin;
929   /*
930     No need to setup local scatters if
931       - primal space is unchanged
932         AND
933       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
934         AND
935       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
936   */
937   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
938     PetscFunctionReturn(0);
939   }
940   /* destroy old objects */
941   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
942   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
943   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
944   /* Set Non-overlapping dimensions */
945   n_B = pcis->n_B; n_D = pcis->n - n_B;
946   n_vertices = pcbddc->n_actual_vertices;
947   /* create auxiliary bitmask */
948   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
949   for (i=0;i<n_vertices;i++) {
950     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
951   }
952 
953   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
954   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
955   for (i=0, n_R=0; i<pcis->n; i++) {
956     if (!PetscBTLookup(bitmask,i)) {
957       idx_R_local[n_R] = i;
958       n_R++;
959     }
960   }
961 
962   /* Block code */
963   vbs = 1;
964   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
965   if (bs>1 && !(n_vertices%bs)) {
966     PetscBool is_blocked = PETSC_TRUE;
967     PetscInt  *vary;
968     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
969     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
970     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
971     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
972     for (i=0; i<n_vertices; i++) {
973       if (vary[i]!=0 && vary[i]!=bs) {
974         is_blocked = PETSC_FALSE;
975         break;
976       }
977     }
978     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
979       vbs = bs;
980       for (i=0;i<n_R/vbs;i++) {
981         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
982       }
983     }
984     ierr = PetscFree(vary);CHKERRQ(ierr);
985   }
986   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
987   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
988 
989   /* print some info if requested */
990   if (pcbddc->dbg_flag) {
991     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
992     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
993     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
994     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
995     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
996     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
997     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
998     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
999   }
1000 
1001   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1002   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1003   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1004   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1005   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1006   for (i=0; i<n_D; i++) {
1007     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1008   }
1009   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1010   for (i=0, j=0; i<n_R; i++) {
1011     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1012       aux_array1[j++] = i;
1013     }
1014   }
1015   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1016   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1017   for (i=0, j=0; i<n_B; i++) {
1018     if (!PetscBTLookup(bitmask,is_indices[i])) {
1019       aux_array2[j++] = i;
1020     }
1021   }
1022   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1023   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1024   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1025   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1026   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1027 
1028   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1029     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1030     for (i=0, j=0; i<n_R; i++) {
1031       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1032         aux_array1[j++] = i;
1033       }
1034     }
1035     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1036     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1037     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1038   }
1039   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1040   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1047 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1048 {
1049   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1050   PC_IS          *pcis = (PC_IS*)pc->data;
1051   PC             pc_temp;
1052   Mat            A_RR;
1053   MatReuse       reuse;
1054   PetscScalar    m_one = -1.0;
1055   PetscReal      value;
1056   PetscInt       n_D,n_R,ibs,mbs;
1057   PetscBool      use_exact,use_exact_reduced,issbaij;
1058   PetscErrorCode ierr;
1059   /* prefixes stuff */
1060   char           dir_prefix[256],neu_prefix[256],str_level[16];
1061   size_t         len;
1062 
1063   PetscFunctionBegin;
1064 
1065   /* compute prefixes */
1066   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1067   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1068   if (!pcbddc->current_level) {
1069     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1070     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1071     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1072     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1073   } else {
1074     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1075     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1076     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1077     len -= 15; /* remove "pc_bddc_coarse_" */
1078     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1079     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1080     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1081     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1082     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1083     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1084     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1085     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1086   }
1087 
1088   /* DIRICHLET PROBLEM */
1089   /* Matrix for Dirichlet problem is pcis->A_II */
1090   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1091   if (!pcbddc->ksp_D) { /* create object if not yet build */
1092     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1093     ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr);
1094     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1095     /* default */
1096     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1097     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1098     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1099     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1100     if (issbaij) {
1101       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1102     } else {
1103       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1104     }
1105     /* Allow user's customization */
1106     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1107     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1108   }
1109   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1110   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1111   if (!n_D) {
1112     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1113     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1114   }
1115   /* Set Up KSP for Dirichlet problem of BDDC */
1116   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1117   /* set ksp_D into pcis data */
1118   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1119   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1120   pcis->ksp_D = pcbddc->ksp_D;
1121 
1122   /* NEUMANN PROBLEM */
1123   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1124   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1125   if (pcbddc->ksp_R) { /* already created ksp */
1126     PetscInt nn_R;
1127     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1128     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1129     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1130     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1131       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1132       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1133       reuse = MAT_INITIAL_MATRIX;
1134     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1135       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1136         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1137         reuse = MAT_INITIAL_MATRIX;
1138       } else { /* safe to reuse the matrix */
1139         reuse = MAT_REUSE_MATRIX;
1140       }
1141     }
1142     /* last check */
1143     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1144       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1145       reuse = MAT_INITIAL_MATRIX;
1146     }
1147   } else { /* first time, so we need to create the matrix */
1148     reuse = MAT_INITIAL_MATRIX;
1149   }
1150   /* extract A_RR */
1151   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1152   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1153   if (ibs != mbs) {
1154     Mat newmat;
1155     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1156     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1157     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1158   } else {
1159     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1160   }
1161   if (!pcbddc->ksp_R) { /* create object if not present */
1162     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1163     ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr);
1164     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1165     /* default */
1166     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1167     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1168     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1169     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1170     if (issbaij) {
1171       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1172     } else {
1173       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1174     }
1175     /* Allow user's customization */
1176     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1177     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1178   }
1179   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1180   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1181   if (!n_R) {
1182     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1183     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1184   }
1185   /* Set Up KSP for Neumann problem of BDDC */
1186   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1187 
1188   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1189   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1190     /* Dirichlet */
1191     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1192     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1193     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1194     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1195     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1196     /* need to be adapted? */
1197     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1198     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1199     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1200     /* print info */
1201     if (pcbddc->dbg_flag) {
1202       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1203       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1204       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1205       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1206       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1207       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1208     }
1209     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1210       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1211     }
1212 
1213     /* Neumann */
1214     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1215     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1216     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1217     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1218     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1219     /* need to be adapted? */
1220     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1221     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1222     /* print info */
1223     if (pcbddc->dbg_flag) {
1224       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1225       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1226     }
1227     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1228       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1229     }
1230   }
1231   /* free Neumann problem's matrix */
1232   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1238 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1239 {
1240   PetscErrorCode ierr;
1241   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1242 
1243   PetscFunctionBegin;
1244   if (applytranspose) {
1245     if (pcbddc->local_auxmat1) {
1246       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1247       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1248     }
1249     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1250   } else {
1251     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1252     if (pcbddc->local_auxmat1) {
1253       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1254       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1255     }
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1261 #undef __FUNCT__
1262 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1263 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1264 {
1265   PetscErrorCode ierr;
1266   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1267   PC_IS*            pcis = (PC_IS*)  (pc->data);
1268   const PetscScalar zero = 0.0;
1269 
1270   PetscFunctionBegin;
1271   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1272   if (applytranspose) {
1273     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1274     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1275   } else {
1276     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1277     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1278   }
1279   /* start communications from local primal nodes to rhs of coarse solver */
1280   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1281   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1282   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283 
1284   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1285   /* TODO remove null space when doing multilevel */
1286   if (pcbddc->coarse_ksp) {
1287     if (applytranspose) {
1288       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1289     } else {
1290       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1291     }
1292   }
1293 
1294   /* Local solution on R nodes */
1295   if (pcis->n) {
1296     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1297     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1298     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1299     if (pcbddc->switch_static) {
1300       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1301       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1302     }
1303     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1304     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1305     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1307     if (pcbddc->switch_static) {
1308       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1309       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1310     }
1311   }
1312 
1313   /* communications from coarse sol to local primal nodes */
1314   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1315   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1316 
1317   /* Sum contributions from two levels */
1318   if (applytranspose) {
1319     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1320     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1321   } else {
1322     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1323     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1324   }
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1329 #undef __FUNCT__
1330 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1331 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1332 {
1333   PetscErrorCode ierr;
1334   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1335   PetscScalar    *array,*array2;
1336   Vec            from,to;
1337 
1338   PetscFunctionBegin;
1339   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1340     from = pcbddc->coarse_vec;
1341     to = pcbddc->vec1_P;
1342     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1343       Vec tvec;
1344       PetscInt lsize;
1345       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1346       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1347       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1348       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1349       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1350       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1351       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1352     }
1353   } else { /* from local to global -> put data in coarse right hand side */
1354     from = pcbddc->vec1_P;
1355     to = pcbddc->coarse_vec;
1356   }
1357   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 #undef __FUNCT__
1362 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1363 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1364 {
1365   PetscErrorCode ierr;
1366   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1367   PetscScalar    *array,*array2;
1368   Vec            from,to;
1369 
1370   PetscFunctionBegin;
1371   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1372     from = pcbddc->coarse_vec;
1373     to = pcbddc->vec1_P;
1374   } else { /* from local to global -> put data in coarse right hand side */
1375     from = pcbddc->vec1_P;
1376     to = pcbddc->coarse_vec;
1377   }
1378   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1379   if (smode == SCATTER_FORWARD) {
1380     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1381       Vec tvec;
1382       PetscInt lsize;
1383       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1384       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1385       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1386       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1387       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1388       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1389       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1390     }
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 /* uncomment for testing purposes */
1396 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1397 #undef __FUNCT__
1398 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1399 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1400 {
1401   PetscErrorCode    ierr;
1402   PC_IS*            pcis = (PC_IS*)(pc->data);
1403   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1404   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1405   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1406   MatType           impMatType=MATSEQAIJ;
1407   /* one and zero */
1408   PetscScalar       one=1.0,zero=0.0;
1409   /* space to store constraints and their local indices */
1410   PetscScalar       *temp_quadrature_constraint;
1411   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1412   /* iterators */
1413   PetscInt          i,j,k,total_counts,temp_start_ptr;
1414   /* stuff to store connected components stored in pcbddc->mat_graph */
1415   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1416   PetscInt          n_ISForFaces,n_ISForEdges;
1417   /* near null space stuff */
1418   MatNullSpace      nearnullsp;
1419   const Vec         *nearnullvecs;
1420   Vec               *localnearnullsp;
1421   PetscBool         nnsp_has_cnst;
1422   PetscInt          nnsp_size;
1423   PetscScalar       *array;
1424   /* BLAS integers */
1425   PetscBLASInt      lwork,lierr;
1426   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1427   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1428   /* LAPACK working arrays for SVD or POD */
1429   PetscBool         skip_lapack;
1430   PetscScalar       *work;
1431   PetscReal         *singular_vals;
1432 #if defined(PETSC_USE_COMPLEX)
1433   PetscReal         *rwork;
1434 #endif
1435 #if defined(PETSC_MISSING_LAPACK_GESVD)
1436   PetscBLASInt      Blas_one_2=1;
1437   PetscScalar       *temp_basis,*correlation_mat;
1438 #else
1439   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1440   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1441 #endif
1442   /* reuse */
1443   PetscInt          olocal_primal_size;
1444   PetscInt          *oprimal_indices_local_idxs;
1445   /* change of basis */
1446   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1447   PetscBool         boolforchange,qr_needed;
1448   PetscBT           touched,change_basis,qr_needed_idx;
1449   /* auxiliary stuff */
1450   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1451   PetscInt          ncc,*gidxs,*permutation,*temp_indices_to_constraint_work;
1452   PetscScalar       *temp_quadrature_constraint_work;
1453   /* some quantities */
1454   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1455   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1456 
1457 
1458   PetscFunctionBegin;
1459   /* Destroy Mat objects computed previously */
1460   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1461   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1462   /* Get index sets for faces, edges and vertices from graph */
1463   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1464   /* free unneeded index sets */
1465   if (!pcbddc->use_vertices) {
1466     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1467   }
1468   if (!pcbddc->use_edges) {
1469     for (i=0;i<n_ISForEdges;i++) {
1470       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1471     }
1472     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1473     n_ISForEdges = 0;
1474   }
1475   if (!pcbddc->use_faces) {
1476     for (i=0;i<n_ISForFaces;i++) {
1477       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1478     }
1479     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1480     n_ISForFaces = 0;
1481   }
1482   /* HACKS (the following two blocks of code) */
1483   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1484     pcbddc->use_change_of_basis = PETSC_TRUE;
1485     if (!ISForEdges) {
1486       pcbddc->use_change_on_faces = PETSC_TRUE;
1487     }
1488   }
1489   if (pcbddc->NullSpace) {
1490     /* use_change_of_basis should be consistent among processors */
1491     PetscBool tbool[2],gbool[2];
1492     tbool [0] = pcbddc->use_change_of_basis;
1493     tbool [1] = pcbddc->use_change_on_faces;
1494     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1495     pcbddc->use_change_of_basis = gbool[0];
1496     pcbddc->use_change_on_faces = gbool[1];
1497   }
1498   /* print some info */
1499   if (pcbddc->dbg_flag) {
1500     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1501     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1502     i = 0;
1503     if (ISForVertices) {
1504       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1505     }
1506     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1507     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1508     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1509     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1510   }
1511   /* check if near null space is attached to global mat */
1512   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1513   if (nearnullsp) {
1514     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1515     /* remove any stored info */
1516     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1517     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1518     /* store information for BDDC solver reuse */
1519     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1520     pcbddc->onearnullspace = nearnullsp;
1521     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1522     for (i=0;i<nnsp_size;i++) {
1523       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1524     }
1525   } else { /* if near null space is not provided BDDC uses constants by default */
1526     nnsp_size = 0;
1527     nnsp_has_cnst = PETSC_TRUE;
1528   }
1529   /* get max number of constraints on a single cc */
1530   max_constraints = nnsp_size;
1531   if (nnsp_has_cnst) max_constraints++;
1532 
1533   /*
1534        Evaluate maximum storage size needed by the procedure
1535        - temp_indices will contain start index of each constraint stored as follows
1536        - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1537        - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1538        - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1539                                                                                                                                                          */
1540   total_counts = n_ISForFaces+n_ISForEdges;
1541   total_counts *= max_constraints;
1542   n_vertices = 0;
1543   if (ISForVertices) {
1544     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1545   }
1546   total_counts += n_vertices;
1547   ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1548   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1549   total_counts = 0;
1550   max_size_of_constraint = 0;
1551   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1552     if (i<n_ISForEdges) {
1553       used_IS = &ISForEdges[i];
1554     } else {
1555       used_IS = &ISForFaces[i-n_ISForEdges];
1556     }
1557     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1558     total_counts += j;
1559     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1560   }
1561   total_counts *= max_constraints;
1562   total_counts += n_vertices;
1563   ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1564   /* get local part of global near null space vectors */
1565   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1566   for (k=0;k<nnsp_size;k++) {
1567     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1568     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1570   }
1571 
1572   /* whether or not to skip lapack calls */
1573   skip_lapack = PETSC_TRUE;
1574   if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1575 
1576   /* allocate some auxiliary stuff */
1577   if (!skip_lapack || pcbddc->use_qr_single) {
1578     ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1579   } else {
1580     gidxs = NULL;
1581     permutation = NULL;
1582     temp_indices_to_constraint_work = NULL;
1583     temp_quadrature_constraint_work = NULL;
1584   }
1585 
1586   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1587   if (!skip_lapack) {
1588     PetscScalar temp_work;
1589 
1590 #if defined(PETSC_MISSING_LAPACK_GESVD)
1591     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1592     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1593     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1594     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1595 #if defined(PETSC_USE_COMPLEX)
1596     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1597 #endif
1598     /* now we evaluate the optimal workspace using query with lwork=-1 */
1599     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1600     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1601     lwork = -1;
1602     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1603 #if !defined(PETSC_USE_COMPLEX)
1604     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1605 #else
1606     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1607 #endif
1608     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1609     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1610 #else /* on missing GESVD */
1611     /* SVD */
1612     PetscInt max_n,min_n;
1613     max_n = max_size_of_constraint;
1614     min_n = max_constraints;
1615     if (max_size_of_constraint < max_constraints) {
1616       min_n = max_size_of_constraint;
1617       max_n = max_constraints;
1618     }
1619     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1620 #if defined(PETSC_USE_COMPLEX)
1621     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1622 #endif
1623     /* now we evaluate the optimal workspace using query with lwork=-1 */
1624     lwork = -1;
1625     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1626     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1627     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1628     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1629 #if !defined(PETSC_USE_COMPLEX)
1630     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1631 #else
1632     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1633 #endif
1634     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1635     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1636 #endif /* on missing GESVD */
1637     /* Allocate optimal workspace */
1638     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1639     ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1640   }
1641   /* Now we can loop on constraining sets */
1642   total_counts = 0;
1643   temp_indices[0] = 0;
1644   /* vertices */
1645   if (ISForVertices) {
1646     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1647     if (nnsp_has_cnst) { /* consider all vertices */
1648       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1649       for (i=0;i<n_vertices;i++) {
1650         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1651         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1652         total_counts++;
1653       }
1654     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1655       PetscBool used_vertex;
1656       for (i=0;i<n_vertices;i++) {
1657         used_vertex = PETSC_FALSE;
1658         k = 0;
1659         while (!used_vertex && k<nnsp_size) {
1660           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1661           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1662             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1663             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1664             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1665             total_counts++;
1666             used_vertex = PETSC_TRUE;
1667           }
1668           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1669           k++;
1670         }
1671       }
1672     }
1673     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1674     n_vertices = total_counts;
1675   }
1676 
1677   /* edges and faces */
1678   for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1679     if (ncc<n_ISForEdges) {
1680       used_IS = &ISForEdges[ncc];
1681       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1682     } else {
1683       used_IS = &ISForFaces[ncc-n_ISForEdges];
1684       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1685     }
1686     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1687     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1688     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1689     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1690     /* change of basis should not be performed on local periodic nodes */
1691     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1692     if (nnsp_has_cnst) {
1693       PetscScalar quad_value;
1694       temp_constraints++;
1695       if (!pcbddc->use_nnsp_true) {
1696         quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1697       } else {
1698         quad_value = 1.0;
1699       }
1700       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1701       for (j=0;j<size_of_constraint;j++) {
1702         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1703       }
1704       /* sort by global ordering if using lapack subroutines */
1705       if (!skip_lapack || pcbddc->use_qr_single) {
1706         ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1707         for (j=0;j<size_of_constraint;j++) {
1708           permutation[j]=j;
1709         }
1710         ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1711         for (j=0;j<size_of_constraint;j++) {
1712           temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1713           temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1714         }
1715         ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1716         ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1717       }
1718       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1719       total_counts++;
1720     }
1721     for (k=0;k<nnsp_size;k++) {
1722       PetscReal real_value;
1723       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1724       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1725       for (j=0;j<size_of_constraint;j++) {
1726         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1727       }
1728       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1729       /* check if array is null on the connected component */
1730       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1731       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1732       if (real_value > 0.0) { /* keep indices and values */
1733         /* sort by global ordering if using lapack subroutines */
1734         if (!skip_lapack || pcbddc->use_qr_single) {
1735           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1736           for (j=0;j<size_of_constraint;j++) {
1737             permutation[j]=j;
1738           }
1739           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1740           for (j=0;j<size_of_constraint;j++) {
1741             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1742             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1743           }
1744           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1745           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1746         }
1747         temp_constraints++;
1748         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1749         total_counts++;
1750       }
1751     }
1752     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1753     valid_constraints = temp_constraints;
1754     if (!pcbddc->use_nnsp_true && temp_constraints) {
1755       if (temp_constraints == 1) { /* just normalize the constraint */
1756         PetscScalar norm;
1757         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1758         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1759         norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
1760         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1761       } else { /* perform SVD */
1762         PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1763 
1764 #if defined(PETSC_MISSING_LAPACK_GESVD)
1765         /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1766            POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1767            -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1768               the constraints basis will differ (by a complex factor with absolute value equal to 1)
1769               from that computed using LAPACKgesvd
1770            -> This is due to a different computation of eigenvectors in LAPACKheev
1771            -> The quality of the POD-computed basis will be the same */
1772         ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1773         /* Store upper triangular part of correlation matrix */
1774         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1775         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1776         for (j=0;j<temp_constraints;j++) {
1777           for (k=0;k<j+1;k++) {
1778             PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1779           }
1780         }
1781         /* compute eigenvalues and eigenvectors of correlation matrix */
1782         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1783         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1784 #if !defined(PETSC_USE_COMPLEX)
1785         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1786 #else
1787         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1788 #endif
1789         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1790         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1791         /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1792         j = 0;
1793         while (j < temp_constraints && singular_vals[j] < tol) j++;
1794         total_counts = total_counts-j;
1795         valid_constraints = temp_constraints-j;
1796         /* scale and copy POD basis into used quadrature memory */
1797         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1798         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1799         ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1800         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1801         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1802         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1803         if (j<temp_constraints) {
1804           PetscInt ii;
1805           for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1806           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1807           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1808           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1809           for (k=0;k<temp_constraints-j;k++) {
1810             for (ii=0;ii<size_of_constraint;ii++) {
1811               temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1812             }
1813           }
1814         }
1815 #else  /* on missing GESVD */
1816         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1817         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1818         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1819         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1820 #if !defined(PETSC_USE_COMPLEX)
1821         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1822 #else
1823         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1824 #endif
1825         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1826         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1827         /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1828         k = temp_constraints;
1829         if (k > size_of_constraint) k = size_of_constraint;
1830         j = 0;
1831         while (j < k && singular_vals[k-j-1] < tol) j++;
1832         valid_constraints = k-j;
1833         total_counts = total_counts-temp_constraints+valid_constraints;
1834 #endif /* on missing GESVD */
1835       }
1836     }
1837     /* setting change_of_basis flag is safe now */
1838     if (boolforchange) {
1839       for (j=0;j<valid_constraints;j++) {
1840         PetscBTSet(change_basis,total_counts-j-1);
1841       }
1842     }
1843   }
1844   /* free index sets of faces, edges and vertices */
1845   for (i=0;i<n_ISForFaces;i++) {
1846     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1847   }
1848   if (n_ISForFaces) {
1849     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1850   }
1851   for (i=0;i<n_ISForEdges;i++) {
1852     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1853   }
1854   if (n_ISForEdges) {
1855     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1856   }
1857   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1858   /* map temp_indices_to_constraint in boundary numbering */
1859   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1860   if (i != temp_indices[total_counts]) {
1861     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1862   }
1863 
1864   /* free workspace */
1865   ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
1866   if (!skip_lapack) {
1867     ierr = PetscFree(work);CHKERRQ(ierr);
1868 #if defined(PETSC_USE_COMPLEX)
1869     ierr = PetscFree(rwork);CHKERRQ(ierr);
1870 #endif
1871     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1872 #if defined(PETSC_MISSING_LAPACK_GESVD)
1873     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1874     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1875 #endif
1876   }
1877   for (k=0;k<nnsp_size;k++) {
1878     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1879   }
1880   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1881 
1882   /* set quantities in pcbddc data structure and store previous primal size */
1883   /* n_vertices defines the number of subdomain corners in the primal space */
1884   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1885   olocal_primal_size = pcbddc->local_primal_size;
1886   pcbddc->local_primal_size = total_counts;
1887   pcbddc->n_vertices = n_vertices;
1888   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1889 
1890   /* Create constraint matrix */
1891   /* The constraint matrix is used to compute the l2g map of primal dofs */
1892   /* so we need to set it up properly either with or without change of basis */
1893   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1894   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1895   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1896   /* array to compute a local numbering of constraints : vertices first then constraints */
1897   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1898   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1899   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1900   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1901   /* auxiliary stuff for basis change */
1902   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1903   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1904 
1905   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1906   total_primal_vertices=0;
1907   for (i=0;i<pcbddc->local_primal_size;i++) {
1908     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1909     if (size_of_constraint == 1) {
1910       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1911       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1912       aux_primal_minloc[total_primal_vertices]=0;
1913       total_primal_vertices++;
1914     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1915       PetscInt min_loc,min_index;
1916       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1917       /* find first untouched local node */
1918       k = 0;
1919       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1920       min_index = global_indices[k];
1921       min_loc = k;
1922       /* search the minimum among global nodes already untouched on the cc */
1923       for (k=1;k<size_of_constraint;k++) {
1924         /* there can be more than one constraint on a single connected component */
1925         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1926           min_index = global_indices[k];
1927           min_loc = k;
1928         }
1929       }
1930       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1931       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1932       aux_primal_minloc[total_primal_vertices]=min_loc;
1933       total_primal_vertices++;
1934     }
1935   }
1936   /* determine if a QR strategy is needed for change of basis */
1937   qr_needed = PETSC_FALSE;
1938   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1939   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1940     if (PetscBTLookup(change_basis,i)) {
1941       if (!pcbddc->use_qr_single) {
1942         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1943         j = 0;
1944         for (k=0;k<size_of_constraint;k++) {
1945           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1946             j++;
1947           }
1948         }
1949         /* found more than one primal dof on the cc */
1950         if (j > 1) {
1951           PetscBTSet(qr_needed_idx,i);
1952           qr_needed = PETSC_TRUE;
1953         }
1954       } else {
1955         PetscBTSet(qr_needed_idx,i);
1956         qr_needed = PETSC_TRUE;
1957       }
1958     }
1959   }
1960   /* free workspace */
1961   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1962 
1963   /* permute indices in order to have a sorted set of vertices */
1964   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1965 
1966   /* nonzero structure of constraint matrix */
1967   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1968   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1969   j=total_primal_vertices;
1970   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1971     if (!PetscBTLookup(change_basis,i)) {
1972       nnz[j]=temp_indices[i+1]-temp_indices[i];
1973       j++;
1974     }
1975   }
1976   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1977   ierr = PetscFree(nnz);CHKERRQ(ierr);
1978   /* set values in constraint matrix */
1979   for (i=0;i<total_primal_vertices;i++) {
1980     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1981   }
1982   total_counts = total_primal_vertices;
1983   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1984     if (!PetscBTLookup(change_basis,i)) {
1985       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1986       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1987       total_counts++;
1988     }
1989   }
1990   /* assembling */
1991   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1992   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1993   /*
1994   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1995   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1996   */
1997   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1998   if (pcbddc->use_change_of_basis) {
1999     /* dual and primal dofs on a single cc */
2000     PetscInt     dual_dofs,primal_dofs;
2001     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2002     PetscInt     primal_counter;
2003     /* working stuff for GEQRF */
2004     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2005     PetscBLASInt lqr_work;
2006     /* working stuff for UNGQR */
2007     PetscScalar  *gqr_work,lgqr_work_t;
2008     PetscBLASInt lgqr_work;
2009     /* working stuff for TRTRS */
2010     PetscScalar  *trs_rhs;
2011     PetscBLASInt Blas_NRHS;
2012     /* pointers for values insertion into change of basis matrix */
2013     PetscInt     *start_rows,*start_cols;
2014     PetscScalar  *start_vals;
2015     /* working stuff for values insertion */
2016     PetscBT      is_primal;
2017     /* matrix sizes */
2018     PetscInt     global_size,local_size;
2019     /* work array for nonzeros */
2020     PetscScalar  *nnz_array;
2021     /* temporary change of basis */
2022     Mat          localChangeOfBasisMatrix;
2023     /* auxiliary work for global change of basis */
2024     Vec          nnz_vec;
2025     PetscInt     *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz;
2026     PetscInt     nvtxs,*xadj,*adjncy,*idxs_mapped;
2027     PetscScalar  *vals;
2028     PetscBool    done;
2029 
2030     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2031     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2032     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
2033     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
2034 
2035     /* nonzeros for local mat */
2036     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
2037     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
2038     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2039       if (PetscBTLookup(change_basis,i)) {
2040         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2041         if (PetscBTLookup(qr_needed_idx,i)) {
2042           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2043         } else {
2044           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
2045           /* get local primal index on the cc */
2046           j = 0;
2047           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2048           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2049         }
2050       }
2051     }
2052     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2053     /* Set initial identity in the matrix */
2054     for (i=0;i<pcis->n_B;i++) {
2055       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2056     }
2057 
2058     if (pcbddc->dbg_flag) {
2059       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2060       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2061     }
2062 
2063 
2064     /* Now we loop on the constraints which need a change of basis */
2065     /*
2066        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2067        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2068 
2069        Basic blocks of change of basis matrix T computed by
2070 
2071           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2072 
2073             | 1        0   ...        0         s_1/S |
2074             | 0        1   ...        0         s_2/S |
2075             |              ...                        |
2076             | 0        ...            1     s_{n-1}/S |
2077             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2078 
2079             with S = \sum_{i=1}^n s_i^2
2080             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2081                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2082 
2083           - QR decomposition of constraints otherwise
2084     */
2085     if (qr_needed) {
2086       /* space to store Q */
2087       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2088       /* first we issue queries for optimal work */
2089       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2090       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2091       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2092       lqr_work = -1;
2093       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2094       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2095       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2096       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2097       lgqr_work = -1;
2098       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2099       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2100       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2101       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2102       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2103       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2104       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2105       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2106       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2107       /* array to store scaling factors for reflectors */
2108       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2109       /* array to store rhs and solution of triangular solver */
2110       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2111       /* allocating workspace for check */
2112       if (pcbddc->dbg_flag) {
2113         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2114       }
2115     }
2116     /* array to store whether a node is primal or not */
2117     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2118     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2119     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2120     if (i != total_primal_vertices) {
2121       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2122     }
2123     for (i=0;i<total_primal_vertices;i++) {
2124       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2125     }
2126     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2127 
2128     /* loop on constraints and see whether or not they need a change of basis and compute it */
2129     /* -> using implicit ordering contained in temp_indices data */
2130     total_counts = pcbddc->n_vertices;
2131     primal_counter = total_counts;
2132     while (total_counts<pcbddc->local_primal_size) {
2133       primal_dofs = 1;
2134       if (PetscBTLookup(change_basis,total_counts)) {
2135         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2136         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2137           primal_dofs++;
2138         }
2139         /* get constraint info */
2140         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2141         dual_dofs = size_of_constraint-primal_dofs;
2142 
2143         if (pcbddc->dbg_flag) {
2144           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2145         }
2146 
2147         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2148 
2149           /* copy quadrature constraints for change of basis check */
2150           if (pcbddc->dbg_flag) {
2151             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2152           }
2153           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2154           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2155 
2156           /* compute QR decomposition of constraints */
2157           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2158           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2159           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2160           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2161           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2162           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2163           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2164 
2165           /* explictly compute R^-T */
2166           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2167           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2168           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2169           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2170           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2171           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2172           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2173           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2174           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2175           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2176 
2177           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2178           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2179           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2180           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2181           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2182           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2183           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2184           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2185           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2186 
2187           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2188              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2189              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2190           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2191           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2192           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2193           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2194           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2195           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2196           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2197           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2198           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2199           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2200 
2201           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2202           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2203           /* insert cols for primal dofs */
2204           for (j=0;j<primal_dofs;j++) {
2205             start_vals = &qr_basis[j*size_of_constraint];
2206             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2207             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2208           }
2209           /* insert cols for dual dofs */
2210           for (j=0,k=0;j<dual_dofs;k++) {
2211             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2212               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2213               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2214               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2215               j++;
2216             }
2217           }
2218 
2219           /* check change of basis */
2220           if (pcbddc->dbg_flag) {
2221             PetscInt   ii,jj;
2222             PetscBool valid_qr=PETSC_TRUE;
2223             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2224             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2225             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2226             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2227             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2228             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2229             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2230             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2231             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2232             for (jj=0;jj<size_of_constraint;jj++) {
2233               for (ii=0;ii<primal_dofs;ii++) {
2234                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2235                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2236               }
2237             }
2238             if (!valid_qr) {
2239               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2240               for (jj=0;jj<size_of_constraint;jj++) {
2241                 for (ii=0;ii<primal_dofs;ii++) {
2242                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2243                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2244                   }
2245                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2246                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2247                   }
2248                 }
2249               }
2250             } else {
2251               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2252             }
2253           }
2254         } else { /* simple transformation block */
2255           PetscInt    row,col;
2256           PetscScalar val,norm;
2257 
2258           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2259           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2260           for (j=0;j<size_of_constraint;j++) {
2261             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2262             if (!PetscBTLookup(is_primal,row)) {
2263               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2264               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2265               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2266             } else {
2267               for (k=0;k<size_of_constraint;k++) {
2268                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2269                 if (row != col) {
2270                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2271                 } else {
2272                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2273                 }
2274                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2275               }
2276             }
2277           }
2278           if (pcbddc->dbg_flag) {
2279             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2280           }
2281         }
2282         /* increment primal counter */
2283         primal_counter += primal_dofs;
2284       } else {
2285         if (pcbddc->dbg_flag) {
2286           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2287         }
2288       }
2289       /* increment constraint counter total_counts */
2290       total_counts += primal_dofs;
2291     }
2292 
2293     /* free workspace */
2294     if (qr_needed) {
2295       if (pcbddc->dbg_flag) {
2296         ierr = PetscFree(work);CHKERRQ(ierr);
2297       }
2298       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2299       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2300       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2301       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2302       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2303     }
2304     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2305     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2306     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2307 
2308     /* assembling of global change of variable */
2309     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2310     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2311     ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2312     ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2313     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2314     ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr);
2315 
2316     /* nonzeros (overestimated) */
2317     ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr);
2318     ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr);
2319     ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr);
2320     for (i=0;i<pcis->n;i++) {
2321       nnz_array[i] = 1.0;
2322       idxs_all[i] = i;
2323     }
2324     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2325     for (i=0;i<pcis->n_B;i++) {
2326       nnz_array[idxs_B[i]] = nnz[i];
2327     }
2328     if (pcis->n) {
2329       ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr);
2330     }
2331     ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr);
2332     ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr);
2333     ierr = PetscFree(nnz);CHKERRQ(ierr);
2334     ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr);
2335     ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr);
2336     ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2337     for (i=0;i<local_size;i++) {
2338       d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size);
2339       o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size);
2340     }
2341     ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2342     ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr);
2343     ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2344     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2345 
2346     /* Set identity on dirichlet dofs */
2347     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2348     for (i=0;i<pcis->n-pcis->n_B;i++) {
2349       PetscScalar one=1.0;
2350       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr);
2351     }
2352     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2353 
2354     /* Set values at interface dofs */
2355     done = PETSC_TRUE;
2356     ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2357     if (!done) {
2358       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2359     }
2360     ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2361     ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr);
2362     ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr);
2363     for (i=0;i<nvtxs;i++) {
2364       PetscInt    row,*cols,ncols;
2365       PetscScalar *mat_vals;
2366 
2367       row = idxs_B[i];
2368       ncols = xadj[i+1]-xadj[i];
2369       cols = idxs_mapped+xadj[i];
2370       mat_vals = vals+xadj[i];
2371       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2372     }
2373     ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2374     if (!done) {
2375       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2376     }
2377     ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2378     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2379     ierr = PetscFree(idxs_mapped);CHKERRQ(ierr);
2380     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2381     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2382 
2383     /* check */
2384     if (pcbddc->dbg_flag) {
2385       PetscReal error;
2386       Vec       x,x_change;
2387 
2388       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2389       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2390       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2391       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2392       ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2393       ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2394       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
2395       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2396       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2397       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2398       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2399       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2400       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2401       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr);
2402       ierr = VecDestroy(&x);CHKERRQ(ierr);
2403       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2404     }
2405     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2406   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2407     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2408     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2409   }
2410 
2411   /* set up change of basis context */
2412   if (pcbddc->ChangeOfBasisMatrix) {
2413     PCBDDCChange_ctx change_ctx;
2414 
2415     if (!pcbddc->new_global_mat) {
2416       PetscInt global_size,local_size;
2417 
2418       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2419       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2420       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2421       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2422       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2423       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2424       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2425       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2426       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2427     } else {
2428       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2429       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2430       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2431     }
2432     if (!pcbddc->user_ChangeOfBasisMatrix) {
2433       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2434       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2435     } else {
2436       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2437       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2438     }
2439     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2440     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2441     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2442     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2443   }
2444 
2445   /* get indices in local ordering for vertices and constraints */
2446   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2447     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2448     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2449   }
2450   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2451   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2452   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2453   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2454   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2455   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2456   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2457   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2458   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2459   /* set quantities in PCBDDC data struct */
2460   pcbddc->n_actual_vertices = i;
2461   /* check if a new primal space has been introduced */
2462   pcbddc->new_primal_space_local = PETSC_TRUE;
2463   if (olocal_primal_size == pcbddc->local_primal_size) {
2464     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2465     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2466     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2467   }
2468   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2469   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2470 
2471   /* flush dbg viewer */
2472   if (pcbddc->dbg_flag) {
2473     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2474   }
2475 
2476   /* free workspace */
2477   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2478   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2479   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2480   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2481   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2482   ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2488 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2489 {
2490   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2491   PC_IS       *pcis = (PC_IS*)pc->data;
2492   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2493   PetscInt    ierr,i,vertex_size;
2494   PetscViewer viewer=pcbddc->dbg_viewer;
2495 
2496   PetscFunctionBegin;
2497   /* Reset previously computed graph */
2498   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2499   /* Init local Graph struct */
2500   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2501 
2502   /* Check validity of the csr graph passed in by the user */
2503   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2504     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2505   }
2506 
2507   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2508   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2509     Mat       mat_adj;
2510     PetscInt  *xadj,*adjncy;
2511     PetscInt  nvtxs;
2512     PetscBool flg_row=PETSC_TRUE;
2513 
2514     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2515     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2516     if (!flg_row) {
2517       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2518     }
2519     if (pcbddc->use_local_adj) {
2520       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2521       pcbddc->deluxe_compute_rowadj = PETSC_FALSE;
2522     } else { /* just compute subdomain's connected components */
2523       IS                     is_dummy;
2524       ISLocalToGlobalMapping l2gmap_dummy;
2525       PetscInt               j,sum;
2526       PetscInt               *cxadj,*cadjncy;
2527       const PetscInt         *idxs;
2528       PCBDDCGraph            graph;
2529       PetscBT                is_on_boundary;
2530 
2531       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2532       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2533       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2534       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2535       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2536       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2537       graph->xadj = xadj;
2538       graph->adjncy = adjncy;
2539       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2540       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2541 
2542       if (pcbddc->dbg_flag) {
2543         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2544         for (i=0;i<graph->ncc;i++) {
2545           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2546         }
2547         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2548       }
2549 
2550       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2551       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2552       for (i=0;i<pcis->n_B;i++) {
2553         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2554       }
2555       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2556 
2557       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2558       sum = 0;
2559       for (i=0;i<graph->ncc;i++) {
2560         PetscInt sizecc = 0;
2561         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2562           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2563             sizecc++;
2564           }
2565         }
2566         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2567           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2568             cxadj[graph->queue[j]] = sizecc;
2569           }
2570         }
2571         sum += sizecc*sizecc;
2572       }
2573       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2574       sum = 0;
2575       for (i=0;i<nvtxs;i++) {
2576         PetscInt temp = cxadj[i];
2577         cxadj[i] = sum;
2578         sum += temp;
2579       }
2580       cxadj[nvtxs] = sum;
2581       for (i=0;i<graph->ncc;i++) {
2582         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2583           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2584             PetscInt k,sizecc = 0;
2585             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2586               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2587                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2588                 sizecc++;
2589               }
2590             }
2591           }
2592         }
2593       }
2594       if (nvtxs) {
2595         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2596       } else {
2597         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2598         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2599       }
2600       graph->xadj = 0;
2601       graph->adjncy = 0;
2602       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2603       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2604     }
2605     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2606     if (!flg_row) {
2607       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2608     }
2609     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2610   }
2611 
2612   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2613   vertex_size = 1;
2614   if (pcbddc->user_provided_isfordofs) {
2615     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2616       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2617       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2618         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2619         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2620       }
2621       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2622       pcbddc->n_ISForDofs = 0;
2623       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2624     }
2625     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2626     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2627   } else {
2628     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2629       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2630       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2631       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2632         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2633       }
2634     }
2635   }
2636 
2637   /* Setup of Graph */
2638   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2639     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2640   }
2641   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2642     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2643   }
2644   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
2645 
2646   /* Graph's connected components analysis */
2647   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2648 
2649   /* print some info to stdout */
2650   if (pcbddc->dbg_flag) {
2651     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
2652   }
2653 
2654   /* mark topography has done */
2655   pcbddc->recompute_topography = PETSC_FALSE;
2656   PetscFunctionReturn(0);
2657 }
2658 
2659 #undef __FUNCT__
2660 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2661 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2662 {
2663   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2664   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2665   PetscErrorCode ierr;
2666 
2667   PetscFunctionBegin;
2668   n = 0;
2669   vertices = 0;
2670   if (pcbddc->ConstraintMatrix) {
2671     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2672     for (i=0;i<local_primal_size;i++) {
2673       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2674       if (size_of_constraint == 1) n++;
2675       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2676     }
2677     if (vertices_idx) {
2678       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2679       n = 0;
2680       for (i=0;i<local_primal_size;i++) {
2681         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2682         if (size_of_constraint == 1) {
2683           vertices[n++]=row_cmat_indices[0];
2684         }
2685         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2686       }
2687     }
2688   }
2689   *n_vertices = n;
2690   if (vertices_idx) *vertices_idx = vertices;
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 #undef __FUNCT__
2695 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2696 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2697 {
2698   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2699   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2700   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2701   PetscBT        touched;
2702   PetscErrorCode ierr;
2703 
2704     /* This function assumes that the number of local constraints per connected component
2705        is not greater than the number of nodes defined for the connected component
2706        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2707   PetscFunctionBegin;
2708   n = 0;
2709   constraints_index = 0;
2710   if (pcbddc->ConstraintMatrix) {
2711     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2712     max_size_of_constraint = 0;
2713     for (i=0;i<local_primal_size;i++) {
2714       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2715       if (size_of_constraint > 1) {
2716         n++;
2717       }
2718       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2719       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2720     }
2721     if (constraints_idx) {
2722       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2723       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2724       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2725       n = 0;
2726       for (i=0;i<local_primal_size;i++) {
2727         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2728         if (size_of_constraint > 1) {
2729           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2730           /* find first untouched local node */
2731           j = 0;
2732           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2733           min_index = row_cmat_global_indices[j];
2734           min_loc = j;
2735           /* search the minimum among nodes not yet touched on the connected component
2736              since there can be more than one constraint on a single cc */
2737           for (j=1;j<size_of_constraint;j++) {
2738             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2739               min_index = row_cmat_global_indices[j];
2740               min_loc = j;
2741             }
2742           }
2743           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2744           constraints_index[n++] = row_cmat_indices[min_loc];
2745         }
2746         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2747       }
2748       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2749       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2750     }
2751   }
2752   *n_constraints = n;
2753   if (constraints_idx) *constraints_idx = constraints_index;
2754   PetscFunctionReturn(0);
2755 }
2756 
2757 #undef __FUNCT__
2758 #define __FUNCT__ "PCBDDCSubsetNumbering"
2759 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2760 {
2761   Vec            local_vec,global_vec;
2762   IS             seqis,paris;
2763   VecScatter     scatter_ctx;
2764   PetscScalar    *array;
2765   PetscInt       *temp_global_dofs;
2766   PetscScalar    globalsum;
2767   PetscInt       i,j,s;
2768   PetscInt       nlocals,first_index,old_index,max_local,max_global;
2769   PetscMPIInt    rank_prec_comm,size_prec_comm;
2770   PetscInt       *dof_sizes,*dof_displs;
2771   PetscBool      first_found;
2772   PetscErrorCode ierr;
2773 
2774   PetscFunctionBegin;
2775   /* mpi buffers */
2776   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2777   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2778   j = ( !rank_prec_comm ? size_prec_comm : 0);
2779   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
2780   /* get maximum size of subset */
2781   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2782   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2783   max_local = 0;
2784   for (i=0;i<n_local_dofs;i++) {
2785     if (max_local < temp_global_dofs[i] ) {
2786       max_local = temp_global_dofs[i];
2787     }
2788   }
2789   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2790   max_global++;
2791   max_local = 0;
2792   for (i=0;i<n_local_dofs;i++) {
2793     if (max_local < local_dofs[i] ) {
2794       max_local = local_dofs[i];
2795     }
2796   }
2797   max_local++;
2798   /* allocate workspace */
2799   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2800   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2801   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2802   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2803   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2804   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2805   /* create scatter */
2806   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2807   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2808   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2809   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2810   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2811   /* init array */
2812   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2813   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2814   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2815   if (local_dofs_mult) {
2816     for (i=0;i<n_local_dofs;i++) {
2817       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2818     }
2819   } else {
2820     for (i=0;i<n_local_dofs;i++) {
2821       array[local_dofs[i]]=1.0;
2822     }
2823   }
2824   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2825   /* scatter into global vec and get total number of global dofs */
2826   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2827   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2828   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2829   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2830   /* Fill global_vec with cumulative function for global numbering */
2831   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2832   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2833   nlocals = 0;
2834   first_index = -1;
2835   first_found = PETSC_FALSE;
2836   for (i=0;i<s;i++) {
2837     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2838       first_found = PETSC_TRUE;
2839       first_index = i;
2840     }
2841     nlocals += (PetscInt)PetscRealPart(array[i]);
2842   }
2843   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2844   if (!rank_prec_comm) {
2845     dof_displs[0]=0;
2846     for (i=1;i<size_prec_comm;i++) {
2847       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2848     }
2849   }
2850   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2851   if (first_found) {
2852     array[first_index] += (PetscScalar)nlocals;
2853     old_index = first_index;
2854     for (i=first_index+1;i<s;i++) {
2855       if (PetscRealPart(array[i]) > 0.1) {
2856         array[i] += array[old_index];
2857         old_index = i;
2858       }
2859     }
2860   }
2861   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2862   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2863   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2864   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2865   /* get global ordering of local dofs */
2866   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2867   if (local_dofs_mult) {
2868     for (i=0;i<n_local_dofs;i++) {
2869       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2870     }
2871   } else {
2872     for (i=0;i<n_local_dofs;i++) {
2873       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2874     }
2875   }
2876   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2877   /* free workspace */
2878   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2879   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2880   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2881   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
2882   /* return pointer to global ordering of local dofs */
2883   *global_numbering_subset = temp_global_dofs;
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 #undef __FUNCT__
2888 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2889 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2890 {
2891   PetscInt       i,j;
2892   PetscScalar    *alphas;
2893   PetscErrorCode ierr;
2894 
2895   PetscFunctionBegin;
2896   /* this implements stabilized Gram-Schmidt */
2897   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2898   for (i=0;i<n;i++) {
2899     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2900     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2901     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2902   }
2903   ierr = PetscFree(alphas);CHKERRQ(ierr);
2904   PetscFunctionReturn(0);
2905 }
2906 
2907 #undef __FUNCT__
2908 #define __FUNCT__ "MatISGetSubassemblingPattern"
2909 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2910 {
2911   Mat             subdomain_adj;
2912   IS              new_ranks,ranks_send_to;
2913   MatPartitioning partitioner;
2914   Mat_IS          *matis;
2915   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2916   PetscInt        prank;
2917   PetscMPIInt     size,rank,color;
2918   PetscInt        *xadj,*adjncy,*oldranks;
2919   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2920   PetscInt        i,local_size,threshold=0;
2921   PetscErrorCode  ierr;
2922   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2923   PetscSubcomm    subcomm;
2924 
2925   PetscFunctionBegin;
2926   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2927   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2928   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2929 
2930   /* Get info on mapping */
2931   matis = (Mat_IS*)(mat->data);
2932   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2933   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2934 
2935   /* build local CSR graph of subdomains' connectivity */
2936   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2937   xadj[0] = 0;
2938   xadj[1] = PetscMax(n_neighs-1,0);
2939   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2940   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2941 
2942   if (threshold) {
2943     PetscInt xadj_count = 0;
2944     for (i=1;i<n_neighs;i++) {
2945       if (n_shared[i] > threshold) {
2946         adjncy[xadj_count] = neighs[i];
2947         adjncy_wgt[xadj_count] = n_shared[i];
2948         xadj_count++;
2949       }
2950     }
2951     xadj[1] = xadj_count;
2952   } else {
2953     if (xadj[1]) {
2954       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2955       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2956     }
2957   }
2958   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2959   if (use_square) {
2960     for (i=0;i<xadj[1];i++) {
2961       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2962     }
2963   }
2964   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2965 
2966   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
2967 
2968   /*
2969     Restrict work on active processes only.
2970   */
2971   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2972   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2973   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2974   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2975   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2976   if (color) {
2977     ierr = PetscFree(xadj);CHKERRQ(ierr);
2978     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2979     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2980   } else {
2981     PetscInt coarsening_ratio;
2982     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
2983     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2984     prank = rank;
2985     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
2986     /*
2987     for (i=0;i<size;i++) {
2988       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2989     }
2990     */
2991     for (i=0;i<xadj[1];i++) {
2992       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2993     }
2994     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2995     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2996     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2997 
2998     /* Partition */
2999     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3000     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3001     if (use_vwgt) {
3002       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3003       v_wgt[0] = local_size;
3004       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3005     }
3006     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3007     coarsening_ratio = size/n_subdomains;
3008     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3009     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3010     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3011     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3012 
3013     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3014     if (contiguous) {
3015       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3016     } else {
3017       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3018     }
3019     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3020     /* clean up */
3021     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3022     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3023     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3024     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3025   }
3026   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3027 
3028   /* assemble parallel IS for sends */
3029   i = 1;
3030   if (color) i=0;
3031   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3032 
3033   /* get back IS */
3034   *is_sends = ranks_send_to;
3035   PetscFunctionReturn(0);
3036 }
3037 
3038 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3039 
3040 #undef __FUNCT__
3041 #define __FUNCT__ "MatISSubassemble"
3042 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3043 {
3044   Mat                    local_mat;
3045   Mat_IS                 *matis;
3046   IS                     is_sends_internal;
3047   PetscInt               rows,cols,new_local_rows;
3048   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3049   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3050   ISLocalToGlobalMapping l2gmap;
3051   PetscInt*              l2gmap_indices;
3052   const PetscInt*        is_indices;
3053   MatType                new_local_type;
3054   /* buffers */
3055   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3056   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3057   PetscInt               *recv_buffer_idxs_local;
3058   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3059   /* MPI */
3060   MPI_Comm               comm,comm_n;
3061   PetscSubcomm           subcomm;
3062   PetscMPIInt            n_sends,n_recvs,commsize;
3063   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3064   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3065   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3066   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3067   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3068   PetscErrorCode         ierr;
3069 
3070   PetscFunctionBegin;
3071   /* TODO: add missing checks */
3072   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3073   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3074   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3075   PetscValidLogicalCollectiveInt(mat,nis,7);
3076   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3077   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3078   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3079   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3080   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3081   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3082   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3083   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3084     PetscInt mrows,mcols,mnrows,mncols;
3085     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3086     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3087     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3088     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3089     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3090     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3091   }
3092   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3093   PetscValidLogicalCollectiveInt(mat,bs,0);
3094   /* prepare IS for sending if not provided */
3095   if (!is_sends) {
3096     PetscBool pcontig = PETSC_TRUE;
3097     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3098     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3099   } else {
3100     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3101     is_sends_internal = is_sends;
3102   }
3103 
3104   /* get pointer of MATIS data */
3105   matis = (Mat_IS*)mat->data;
3106 
3107   /* get comm */
3108   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3109 
3110   /* compute number of sends */
3111   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3112   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3113 
3114   /* compute number of receives */
3115   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3116   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3117   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3118   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3119   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3120   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3121   ierr = PetscFree(iflags);CHKERRQ(ierr);
3122 
3123   /* restrict comm if requested */
3124   subcomm = 0;
3125   destroy_mat = PETSC_FALSE;
3126   if (restrict_comm) {
3127     PetscMPIInt color,rank,subcommsize;
3128     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3129     color = 0;
3130     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3131     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3132     subcommsize = commsize - subcommsize;
3133     /* check if reuse has been requested */
3134     if (reuse == MAT_REUSE_MATRIX) {
3135       if (*mat_n) {
3136         PetscMPIInt subcommsize2;
3137         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3138         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3139         comm_n = PetscObjectComm((PetscObject)*mat_n);
3140       } else {
3141         comm_n = PETSC_COMM_SELF;
3142       }
3143     } else { /* MAT_INITIAL_MATRIX */
3144       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3145       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3146       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3147       comm_n = PetscSubcommChild(subcomm);
3148     }
3149     /* flag to destroy *mat_n if not significative */
3150     if (color) destroy_mat = PETSC_TRUE;
3151   } else {
3152     comm_n = comm;
3153   }
3154 
3155   /* prepare send/receive buffers */
3156   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3157   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3158   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3159   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3160   if (nis) {
3161     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3162   }
3163 
3164   /* Get data from local matrices */
3165   if (!isdense) {
3166     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3167     /* TODO: See below some guidelines on how to prepare the local buffers */
3168     /*
3169        send_buffer_vals should contain the raw values of the local matrix
3170        send_buffer_idxs should contain:
3171        - MatType_PRIVATE type
3172        - PetscInt        size_of_l2gmap
3173        - PetscInt        global_row_indices[size_of_l2gmap]
3174        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3175     */
3176   } else {
3177     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3178     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3179     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3180     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3181     send_buffer_idxs[1] = i;
3182     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3183     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3184     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3185     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3186     for (i=0;i<n_sends;i++) {
3187       ilengths_vals[is_indices[i]] = len*len;
3188       ilengths_idxs[is_indices[i]] = len+2;
3189     }
3190   }
3191   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3192   /* additional is (if any) */
3193   if (nis) {
3194     PetscMPIInt psum;
3195     PetscInt j;
3196     for (j=0,psum=0;j<nis;j++) {
3197       PetscInt plen;
3198       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3199       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3200       psum += len+1; /* indices + lenght */
3201     }
3202     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3203     for (j=0,psum=0;j<nis;j++) {
3204       PetscInt plen;
3205       const PetscInt *is_array_idxs;
3206       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3207       send_buffer_idxs_is[psum] = plen;
3208       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3209       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3210       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3211       psum += plen+1; /* indices + lenght */
3212     }
3213     for (i=0;i<n_sends;i++) {
3214       ilengths_idxs_is[is_indices[i]] = psum;
3215     }
3216     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3217   }
3218 
3219   buf_size_idxs = 0;
3220   buf_size_vals = 0;
3221   buf_size_idxs_is = 0;
3222   for (i=0;i<n_recvs;i++) {
3223     buf_size_idxs += (PetscInt)olengths_idxs[i];
3224     buf_size_vals += (PetscInt)olengths_vals[i];
3225     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3226   }
3227   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3228   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3229   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3230 
3231   /* get new tags for clean communications */
3232   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3233   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3234   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3235 
3236   /* allocate for requests */
3237   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3238   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3239   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3240   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3241   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3242   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3243 
3244   /* communications */
3245   ptr_idxs = recv_buffer_idxs;
3246   ptr_vals = recv_buffer_vals;
3247   ptr_idxs_is = recv_buffer_idxs_is;
3248   for (i=0;i<n_recvs;i++) {
3249     source_dest = onodes[i];
3250     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3251     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3252     ptr_idxs += olengths_idxs[i];
3253     ptr_vals += olengths_vals[i];
3254     if (nis) {
3255       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3256       ptr_idxs_is += olengths_idxs_is[i];
3257     }
3258   }
3259   for (i=0;i<n_sends;i++) {
3260     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3261     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3262     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3263     if (nis) {
3264       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3265     }
3266   }
3267   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3268   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3269 
3270   /* assemble new l2g map */
3271   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3272   ptr_idxs = recv_buffer_idxs;
3273   new_local_rows = 0;
3274   for (i=0;i<n_recvs;i++) {
3275     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3276     ptr_idxs += olengths_idxs[i];
3277   }
3278   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3279   ptr_idxs = recv_buffer_idxs;
3280   new_local_rows = 0;
3281   for (i=0;i<n_recvs;i++) {
3282     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3283     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3284     ptr_idxs += olengths_idxs[i];
3285   }
3286   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3287   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3288   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3289 
3290   /* infer new local matrix type from received local matrices type */
3291   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3292   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3293   if (n_recvs) {
3294     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3295     ptr_idxs = recv_buffer_idxs;
3296     for (i=0;i<n_recvs;i++) {
3297       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3298         new_local_type_private = MATAIJ_PRIVATE;
3299         break;
3300       }
3301       ptr_idxs += olengths_idxs[i];
3302     }
3303     switch (new_local_type_private) {
3304       case MATDENSE_PRIVATE:
3305         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3306           new_local_type = MATSEQAIJ;
3307           bs = 1;
3308         } else { /* if I receive only 1 dense matrix */
3309           new_local_type = MATSEQDENSE;
3310           bs = 1;
3311         }
3312         break;
3313       case MATAIJ_PRIVATE:
3314         new_local_type = MATSEQAIJ;
3315         bs = 1;
3316         break;
3317       case MATBAIJ_PRIVATE:
3318         new_local_type = MATSEQBAIJ;
3319         break;
3320       case MATSBAIJ_PRIVATE:
3321         new_local_type = MATSEQSBAIJ;
3322         break;
3323       default:
3324         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3325         break;
3326     }
3327   } else { /* by default, new_local_type is seqdense */
3328     new_local_type = MATSEQDENSE;
3329     bs = 1;
3330   }
3331 
3332   /* create MATIS object if needed */
3333   if (reuse == MAT_INITIAL_MATRIX) {
3334     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3335     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3336   } else {
3337     /* it also destroys the local matrices */
3338     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3339   }
3340   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3341   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3342 
3343   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3344 
3345   /* Global to local map of received indices */
3346   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3347   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3348   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3349 
3350   /* restore attributes -> type of incoming data and its size */
3351   buf_size_idxs = 0;
3352   for (i=0;i<n_recvs;i++) {
3353     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3354     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3355     buf_size_idxs += (PetscInt)olengths_idxs[i];
3356   }
3357   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3358 
3359   /* set preallocation */
3360   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3361   if (!newisdense) {
3362     PetscInt *new_local_nnz=0;
3363 
3364     ptr_vals = recv_buffer_vals;
3365     ptr_idxs = recv_buffer_idxs_local;
3366     if (n_recvs) {
3367       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3368     }
3369     for (i=0;i<n_recvs;i++) {
3370       PetscInt j;
3371       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3372         for (j=0;j<*(ptr_idxs+1);j++) {
3373           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3374         }
3375       } else {
3376         /* TODO */
3377       }
3378       ptr_idxs += olengths_idxs[i];
3379     }
3380     if (new_local_nnz) {
3381       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3382       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3383       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3384       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3385       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3386       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3387     } else {
3388       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3389     }
3390     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3391   } else {
3392     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3393   }
3394 
3395   /* set values */
3396   ptr_vals = recv_buffer_vals;
3397   ptr_idxs = recv_buffer_idxs_local;
3398   for (i=0;i<n_recvs;i++) {
3399     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3400       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3401       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3402       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3403       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3404       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3405     } else {
3406       /* TODO */
3407     }
3408     ptr_idxs += olengths_idxs[i];
3409     ptr_vals += olengths_vals[i];
3410   }
3411   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3412   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3413   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3414   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3415   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3416   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3417 
3418 #if 0
3419   if (!restrict_comm) { /* check */
3420     Vec       lvec,rvec;
3421     PetscReal infty_error;
3422 
3423     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3424     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3425     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3426     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3427     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3428     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3429     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3430     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3431     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3432   }
3433 #endif
3434 
3435   /* assemble new additional is (if any) */
3436   if (nis) {
3437     PetscInt **temp_idxs,*count_is,j,psum;
3438 
3439     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3440     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3441     ptr_idxs = recv_buffer_idxs_is;
3442     psum = 0;
3443     for (i=0;i<n_recvs;i++) {
3444       for (j=0;j<nis;j++) {
3445         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3446         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3447         psum += plen;
3448         ptr_idxs += plen+1; /* shift pointer to received data */
3449       }
3450     }
3451     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3452     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3453     for (i=1;i<nis;i++) {
3454       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3455     }
3456     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3457     ptr_idxs = recv_buffer_idxs_is;
3458     for (i=0;i<n_recvs;i++) {
3459       for (j=0;j<nis;j++) {
3460         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3461         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3462         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3463         ptr_idxs += plen+1; /* shift pointer to received data */
3464       }
3465     }
3466     for (i=0;i<nis;i++) {
3467       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3468       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3469       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3470     }
3471     ierr = PetscFree(count_is);CHKERRQ(ierr);
3472     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3473     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3474   }
3475   /* free workspace */
3476   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3477   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3478   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3479   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3480   if (isdense) {
3481     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3482     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3483   } else {
3484     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3485   }
3486   if (nis) {
3487     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3488     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3489   }
3490   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3491   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3492   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3493   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3494   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3495   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3496   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3497   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3498   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3499   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3500   ierr = PetscFree(onodes);CHKERRQ(ierr);
3501   if (nis) {
3502     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3503     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3504     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3505   }
3506   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3507   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3508     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3509     for (i=0;i<nis;i++) {
3510       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3511     }
3512   }
3513   PetscFunctionReturn(0);
3514 }
3515 
3516 /* temporary hack into ksp private data structure */
3517 #include <petsc/private/kspimpl.h>
3518 
3519 #undef __FUNCT__
3520 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3521 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3522 {
3523   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3524   PC_IS                  *pcis = (PC_IS*)pc->data;
3525   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3526   MatNullSpace           CoarseNullSpace=NULL;
3527   ISLocalToGlobalMapping coarse_islg;
3528   IS                     coarse_is,*isarray;
3529   PetscInt               i,im_active=-1,active_procs=-1;
3530   PetscInt               nis,nisdofs,nisneu;
3531   PC                     pc_temp;
3532   PCType                 coarse_pc_type;
3533   KSPType                coarse_ksp_type;
3534   PetscBool              multilevel_requested,multilevel_allowed;
3535   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3536   Mat                    t_coarse_mat_is;
3537   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3538   PetscMPIInt            all_procs;
3539   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3540   PetscBool              compute_vecs = PETSC_FALSE;
3541   PetscScalar            *array;
3542   PetscErrorCode         ierr;
3543 
3544   PetscFunctionBegin;
3545   /* Assign global numbering to coarse dofs */
3546   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3547     PetscInt ocoarse_size;
3548     compute_vecs = PETSC_TRUE;
3549     ocoarse_size = pcbddc->coarse_size;
3550     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3551     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3552     /* see if we can avoid some work */
3553     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3554       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3555         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3556         coarse_reuse = PETSC_FALSE;
3557       } else { /* we can safely reuse already computed coarse matrix */
3558         coarse_reuse = PETSC_TRUE;
3559       }
3560     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3561       coarse_reuse = PETSC_FALSE;
3562     }
3563     /* reset any subassembling information */
3564     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3565     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3566   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3567     coarse_reuse = PETSC_TRUE;
3568   }
3569 
3570   /* count "active" (i.e. with positive local size) and "void" processes */
3571   im_active = !!(pcis->n);
3572   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3573   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3574   void_procs = all_procs-active_procs;
3575   csin_type_simple = PETSC_TRUE;
3576   redist = PETSC_FALSE;
3577   if (pcbddc->current_level && void_procs) {
3578     csin_ml = PETSC_TRUE;
3579     ncoarse_ml = void_procs;
3580     csin_ds = PETSC_TRUE;
3581     ncoarse_ds = void_procs;
3582   } else {
3583     csin_ml = PETSC_FALSE;
3584     ncoarse_ml = all_procs;
3585     if (void_procs) {
3586       csin_ds = PETSC_TRUE;
3587       ncoarse_ds = void_procs;
3588       csin_type_simple = PETSC_FALSE;
3589     } else {
3590       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3591         csin_ds = PETSC_TRUE;
3592         ncoarse_ds = pcbddc->redistribute_coarse;
3593         redist = PETSC_TRUE;
3594       } else {
3595         csin_ds = PETSC_FALSE;
3596         ncoarse_ds = all_procs;
3597       }
3598     }
3599   }
3600 
3601   /*
3602     test if we can go multilevel: three conditions must be satisfied:
3603     - we have not exceeded the number of levels requested
3604     - we can actually subassemble the active processes
3605     - we can find a suitable number of MPI processes where we can place the subassembled problem
3606   */
3607   multilevel_allowed = PETSC_FALSE;
3608   multilevel_requested = PETSC_FALSE;
3609   if (pcbddc->current_level < pcbddc->max_levels) {
3610     multilevel_requested = PETSC_TRUE;
3611     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3612       multilevel_allowed = PETSC_FALSE;
3613     } else {
3614       multilevel_allowed = PETSC_TRUE;
3615     }
3616   }
3617   /* determine number of process partecipating to coarse solver */
3618   if (multilevel_allowed) {
3619     ncoarse = ncoarse_ml;
3620     csin = csin_ml;
3621   } else {
3622     ncoarse = ncoarse_ds;
3623     csin = csin_ds;
3624   }
3625 
3626   /* creates temporary l2gmap and IS for coarse indexes */
3627   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3628   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3629 
3630   /* creates temporary MATIS object for coarse matrix */
3631   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3632   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3633   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3634   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3635 #if 0
3636   {
3637     PetscViewer viewer;
3638     char filename[256];
3639     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3640     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3641     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3642     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3643     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3644   }
3645 #endif
3646   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3647   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3648   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3649   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3650   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3651 
3652   /* compute dofs splitting and neumann boundaries for coarse dofs */
3653   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3654     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3655     const PetscInt         *idxs;
3656     ISLocalToGlobalMapping tmap;
3657 
3658     /* create map between primal indices (in local representative ordering) and local primal numbering */
3659     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3660     /* allocate space for temporary storage */
3661     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3662     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3663     /* allocate for IS array */
3664     nisdofs = pcbddc->n_ISForDofsLocal;
3665     nisneu = !!pcbddc->NeumannBoundariesLocal;
3666     nis = nisdofs + nisneu;
3667     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3668     /* dofs splitting */
3669     for (i=0;i<nisdofs;i++) {
3670       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3671       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3672       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3673       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3674       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3675       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3676       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3677       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3678     }
3679     /* neumann boundaries */
3680     if (pcbddc->NeumannBoundariesLocal) {
3681       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3682       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3683       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3684       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3685       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3686       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3687       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3688       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3689     }
3690     /* free memory */
3691     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3692     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3693     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3694   } else {
3695     nis = 0;
3696     nisdofs = 0;
3697     nisneu = 0;
3698     isarray = NULL;
3699   }
3700   /* destroy no longer needed map */
3701   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3702 
3703   /* restrict on coarse candidates (if needed) */
3704   coarse_mat_is = NULL;
3705   if (csin) {
3706     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3707       if (redist) {
3708         PetscMPIInt rank;
3709         PetscInt spc,n_spc_p1,dest[1];
3710 
3711         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3712         spc = all_procs/pcbddc->redistribute_coarse;
3713         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
3714         if (rank > n_spc_p1*(spc+1)-1) {
3715           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3716         } else {
3717           dest[0] = rank/(spc+1);
3718         }
3719         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3720       } else {
3721         PetscInt j,tissize,*nisindices;
3722         PetscInt *coarse_candidates;
3723         const PetscInt* tisindices;
3724         /* get coarse candidates' ranks in pc communicator */
3725         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
3726         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3727         for (i=0,j=0;i<all_procs;i++) {
3728           if (!coarse_candidates[i]) {
3729             coarse_candidates[j]=i;
3730             j++;
3731           }
3732         }
3733         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3734         /* get a suitable subassembling pattern */
3735         if (csin_type_simple) {
3736           PetscMPIInt rank;
3737           PetscInt    issize,isidx;
3738           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3739           if (im_active) {
3740             issize = 1;
3741             isidx = (PetscInt)rank;
3742           } else {
3743             issize = 0;
3744             isidx = -1;
3745           }
3746           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3747         } else {
3748           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3749         }
3750         if (pcbddc->dbg_flag) {
3751           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3752           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3753           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3754           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3755           for (i=0;i<j;i++) {
3756             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3757           }
3758           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3759           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3760         }
3761         /* shift the pattern on coarse candidates */
3762         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3763         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3764         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
3765         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3766         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3767         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3768         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3769       }
3770     }
3771     if (pcbddc->dbg_flag) {
3772       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3773       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3774       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3775       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3776     }
3777     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3778     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3779   } else {
3780     if (pcbddc->dbg_flag) {
3781       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3782       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3783       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3784     }
3785     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3786     coarse_mat_is = t_coarse_mat_is;
3787   }
3788 
3789   /* create local to global scatters for coarse problem */
3790   if (compute_vecs) {
3791     PetscInt lrows;
3792     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3793     if (coarse_mat_is) {
3794       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3795     } else {
3796       lrows = 0;
3797     }
3798     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3799     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3800     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3801     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3802     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3803   }
3804   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3805   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3806 
3807   /* set defaults for coarse KSP and PC */
3808   if (multilevel_allowed) {
3809     coarse_ksp_type = KSPRICHARDSON;
3810     coarse_pc_type = PCBDDC;
3811   } else {
3812     coarse_ksp_type = KSPPREONLY;
3813     coarse_pc_type = PCREDUNDANT;
3814   }
3815 
3816   /* print some info if requested */
3817   if (pcbddc->dbg_flag) {
3818     if (!multilevel_allowed) {
3819       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3820       if (multilevel_requested) {
3821         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3822       } else if (pcbddc->max_levels) {
3823         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3824       }
3825       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3826     }
3827   }
3828 
3829   /* create the coarse KSP object only once with defaults */
3830   if (coarse_mat_is) {
3831     MatReuse coarse_mat_reuse;
3832     PetscViewer dbg_viewer = NULL;
3833     if (pcbddc->dbg_flag) {
3834       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3835       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3836     }
3837     if (!pcbddc->coarse_ksp) {
3838       char prefix[256],str_level[16];
3839       size_t len;
3840       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3841       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
3842       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3843       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3844       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3845       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3846       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3847       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3848       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3849       /* prefix */
3850       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3851       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3852       if (!pcbddc->current_level) {
3853         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3854         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3855       } else {
3856         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3857         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3858         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3859         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3860         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3861         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3862       }
3863       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3864       /* allow user customization */
3865       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3866       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3867     }
3868 
3869     /* get some info after set from options */
3870     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3871     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3872     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3873     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3874     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3875       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3876       isbddc = PETSC_FALSE;
3877     }
3878     if (isredundant) {
3879       KSP inner_ksp;
3880       PC inner_pc;
3881       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3882       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3883       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3884     }
3885 
3886     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3887     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3888     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3889     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3890     if (nisdofs) {
3891       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3892       for (i=0;i<nisdofs;i++) {
3893         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3894       }
3895     }
3896     if (nisneu) {
3897       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3898       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3899     }
3900 
3901     /* assemble coarse matrix */
3902     if (coarse_reuse) {
3903       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3904       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3905       coarse_mat_reuse = MAT_REUSE_MATRIX;
3906     } else {
3907       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3908     }
3909     if (isbddc || isnn) {
3910       if (pcbddc->coarsening_ratio > 1) {
3911         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3912           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3913           if (pcbddc->dbg_flag) {
3914             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3915             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3916             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3917             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3918           }
3919         }
3920         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3921       } else {
3922         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
3923         coarse_mat = coarse_mat_is;
3924       }
3925     } else {
3926       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3927     }
3928     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3929 
3930     /* propagate symmetry info to coarse matrix */
3931     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3932     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3933 
3934     /* set operators */
3935     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3936     if (pcbddc->dbg_flag) {
3937       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3938     }
3939   } else { /* processes non partecipating to coarse solver (if any) */
3940     coarse_mat = 0;
3941   }
3942   ierr = PetscFree(isarray);CHKERRQ(ierr);
3943 #if 0
3944   {
3945     PetscViewer viewer;
3946     char filename[256];
3947     sprintf(filename,"coarse_mat.m");
3948     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3949     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3950     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3951     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3952   }
3953 #endif
3954 
3955   /* Compute coarse null space (special handling by BDDC only) */
3956   if (pcbddc->NullSpace) {
3957     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3958   }
3959 
3960   if (pcbddc->coarse_ksp) {
3961     Vec crhs,csol;
3962     PetscBool ispreonly;
3963     if (CoarseNullSpace) {
3964       if (isbddc) {
3965         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3966       } else {
3967         ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
3968       }
3969     }
3970     /* setup coarse ksp */
3971     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3972     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3973     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3974     /* hack */
3975     if (!csol) {
3976       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3977     }
3978     if (!crhs) {
3979       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3980     }
3981     /* Check coarse problem if in debug mode or if solving with an iterative method */
3982     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3983     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3984       KSP       check_ksp;
3985       KSPType   check_ksp_type;
3986       PC        check_pc;
3987       Vec       check_vec,coarse_vec;
3988       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3989       PetscInt  its;
3990       PetscBool compute_eigs;
3991       PetscReal *eigs_r,*eigs_c;
3992       PetscInt  neigs;
3993       const char *prefix;
3994 
3995       /* Create ksp object suitable for estimation of extreme eigenvalues */
3996       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3997       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
3998       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3999       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4000       if (ispreonly) {
4001         check_ksp_type = KSPPREONLY;
4002         compute_eigs = PETSC_FALSE;
4003       } else {
4004         check_ksp_type = KSPGMRES;
4005         compute_eigs = PETSC_TRUE;
4006       }
4007       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4008       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4009       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4010       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4011       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4012       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4013       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4014       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4015       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4016       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4017       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4018       /* create random vec */
4019       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4020       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4021       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4022       if (CoarseNullSpace) {
4023         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4024       }
4025       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4026       /* solve coarse problem */
4027       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4028       if (CoarseNullSpace) {
4029         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4030       }
4031       /* set eigenvalue estimation if preonly has not been requested */
4032       if (compute_eigs) {
4033         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4034         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4035         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4036         lambda_max = eigs_r[neigs-1];
4037         lambda_min = eigs_r[0];
4038         if (pcbddc->use_coarse_estimates) {
4039           if (lambda_max>lambda_min) {
4040             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4041             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4042           }
4043         }
4044       }
4045 
4046       /* check coarse problem residual error */
4047       if (pcbddc->dbg_flag) {
4048         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4049         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4050         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4051         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4052         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4053         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4054         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4055         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4056         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4057         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4058         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4059         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4060         if (compute_eigs) {
4061           PetscReal lambda_max_s,lambda_min_s;
4062           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4063           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4064           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4065           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4066           for (i=0;i<neigs;i++) {
4067             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4068           }
4069         }
4070         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4071         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4072       }
4073       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4074       if (compute_eigs) {
4075         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4076         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4077       }
4078     }
4079   }
4080   /* print additional info */
4081   if (pcbddc->dbg_flag) {
4082     /* waits until all processes reaches this point */
4083     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4084     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4085     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4086   }
4087 
4088   /* free memory */
4089   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4090   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4091   PetscFunctionReturn(0);
4092 }
4093 
4094 #undef __FUNCT__
4095 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4096 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4097 {
4098   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4099   PC_IS*         pcis = (PC_IS*)pc->data;
4100   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4101   PetscInt       i,coarse_size;
4102   PetscInt       *local_primal_indices;
4103   PetscErrorCode ierr;
4104 
4105   PetscFunctionBegin;
4106   /* Compute global number of coarse dofs */
4107   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4108     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4109   }
4110   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4111 
4112   /* check numbering */
4113   if (pcbddc->dbg_flag) {
4114     PetscScalar coarsesum,*array;
4115     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4116 
4117     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4118     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4119     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4120     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4121     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4122     for (i=0;i<pcbddc->local_primal_size;i++) {
4123       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4124     }
4125     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4126     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4127     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4128     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4129     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4130     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4131     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4132     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4133     for (i=0;i<pcis->n;i++) {
4134       if (array[i] == 1.0) {
4135         set_error = PETSC_TRUE;
4136         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4137       }
4138     }
4139     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4140     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4141     for (i=0;i<pcis->n;i++) {
4142       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4143     }
4144     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4145     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4146     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4147     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4148     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4149     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4150     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4151       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4152       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4153       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4154       for (i=0;i<pcbddc->local_primal_size;i++) {
4155         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
4156       }
4157       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4158     }
4159     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4160     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4161   }
4162   /* get back data */
4163   *coarse_size_n = coarse_size;
4164   *local_primal_indices_n = local_primal_indices;
4165   PetscFunctionReturn(0);
4166 }
4167 
4168 #undef __FUNCT__
4169 #define __FUNCT__ "PCBDDCGlobalToLocal"
4170 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4171 {
4172   IS             localis_t;
4173   PetscInt       i,lsize,*idxs,n;
4174   PetscScalar    *vals;
4175   PetscErrorCode ierr;
4176 
4177   PetscFunctionBegin;
4178   /* get indices in local ordering exploiting local to global map */
4179   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4180   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4181   for (i=0;i<lsize;i++) vals[i] = 1.0;
4182   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4183   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4184   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4185   if (idxs) { /* multilevel guard */
4186     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4187   }
4188   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4189   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4190   ierr = PetscFree(vals);CHKERRQ(ierr);
4191   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4192   /* now compute set in local ordering */
4193   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4194   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4195   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4196   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4197   for (i=0,lsize=0;i<n;i++) {
4198     if (PetscRealPart(vals[i]) > 0.5) {
4199       lsize++;
4200     }
4201   }
4202   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4203   for (i=0,lsize=0;i<n;i++) {
4204     if (PetscRealPart(vals[i]) > 0.5) {
4205       idxs[lsize++] = i;
4206     }
4207   }
4208   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4209   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4210   *localis = localis_t;
4211   PetscFunctionReturn(0);
4212 }
4213 
4214 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4215 #undef __FUNCT__
4216 #define __FUNCT__ "PCBDDCMatMult_Private"
4217 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4218 {
4219   PCBDDCChange_ctx change_ctx;
4220   PetscErrorCode   ierr;
4221 
4222   PetscFunctionBegin;
4223   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4224   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4225   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4226   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4227   PetscFunctionReturn(0);
4228 }
4229 
4230 #undef __FUNCT__
4231 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4232 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4233 {
4234   PCBDDCChange_ctx change_ctx;
4235   PetscErrorCode   ierr;
4236 
4237   PetscFunctionBegin;
4238   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4239   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4240   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4241   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4242   PetscFunctionReturn(0);
4243 }
4244