xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision ae5cfb4a3679d07b6904bd2c794eee8dc00ed47b)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCSetUpSolvers"
10 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PetscScalar    *coarse_submat_vals;
14   PetscErrorCode ierr;
15 
16   PetscFunctionBegin;
17   /* Compute matrix after change of basis and extract local submatrices */
18   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D */
21   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
22   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
23 
24   /* Setup local solvers ksp_D and ksp_R */
25   /* PCBDDCSetUpLocalScatters should be called first! */
26   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
27 
28   /* Change global null space passed in by the user if change of basis has been requested */
29   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
30     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
31   }
32 
33   /*
34      Setup local correction and local part of coarse basis.
35      Gives back the dense local part of the coarse matrix in column major ordering
36   */
37   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* Compute total number of coarse nodes and setup coarse solver */
40   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
41 
42   /* free */
43   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 #undef __FUNCT__
48 #define __FUNCT__ "PCBDDCResetCustomization"
49 PetscErrorCode PCBDDCResetCustomization(PC pc)
50 {
51   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
52   PetscErrorCode ierr;
53 
54   PetscFunctionBegin;
55   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
57   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
59   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
61   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
62   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
63   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
64   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
65   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNCT__
70 #define __FUNCT__ "PCBDDCResetTopography"
71 PetscErrorCode PCBDDCResetTopography(PC pc)
72 {
73   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
78   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
79   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
80   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
81   PetscFunctionReturn(0);
82 }
83 
84 #undef __FUNCT__
85 #define __FUNCT__ "PCBDDCResetSolvers"
86 PetscErrorCode PCBDDCResetSolvers(PC pc)
87 {
88   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
93   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
100   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
101   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
102   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
103   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
104   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
106   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
107   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
108   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
109   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
110   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
111   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
112   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
113   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
114   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
115   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
116   PetscFunctionReturn(0);
117 }
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
122 {
123   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
124   PC_IS          *pcis = (PC_IS*)pc->data;
125   VecType        impVecType;
126   PetscInt       n_constraints,n_R,old_size;
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   if (!pcbddc->ConstraintMatrix) {
131     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
132   }
133   /* get sizes */
134   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
135   n_R = pcis->n-pcbddc->n_actual_vertices;
136   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
137   /* local work vectors (try to avoid unneeded work)*/
138   /* R nodes */
139   old_size = -1;
140   if (pcbddc->vec1_R) {
141     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
142   }
143   if (n_R != old_size) {
144     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
145     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
146     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
147     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
148     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
149     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
150   }
151   /* local primal dofs */
152   old_size = -1;
153   if (pcbddc->vec1_P) {
154     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
155   }
156   if (pcbddc->local_primal_size != old_size) {
157     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
158     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
159     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
160     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
161   }
162   /* local explicit constraints */
163   old_size = -1;
164   if (pcbddc->vec1_C) {
165     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
166   }
167   if (n_constraints && n_constraints != old_size) {
168     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
169     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
170     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
171     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNCT__
177 #define __FUNCT__ "PCBDDCSetUpCorrection"
178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
179 {
180   PetscErrorCode         ierr;
181   /* pointers to pcis and pcbddc */
182   PC_IS*                 pcis = (PC_IS*)pc->data;
183   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
184   /* submatrices of local problem */
185   Mat                    A_RV,A_VR,A_VV;
186   /* working matrices */
187   Mat                    M1,M2,M3,C_CR;
188   /* working vectors */
189   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
190   /* additional working stuff */
191   IS                     is_aux;
192   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
193   const PetscScalar      *array,*row_cmat_values;
194   const PetscInt         *row_cmat_indices,*idx_R_local;
195   PetscInt               *idx_V_B,*auxindices;
196   PetscInt               n_vertices,n_constraints,size_of_constraint;
197   PetscInt               i,j,n_R,n_D,n_B;
198   PetscBool              unsymmetric_check;
199   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
200   MatType                impMatType;
201   /* some shortcuts to scalars */
202   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
203   /* for debugging purposes */
204   PetscReal              *coarsefunctions_errors,*constraints_errors;
205 
206   PetscFunctionBegin;
207   /* get number of vertices (corners plus constraints with change of basis)
208      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
209   n_vertices = pcbddc->n_actual_vertices;
210   n_constraints = pcbddc->local_primal_size-n_vertices;
211   /* Set Non-overlapping dimensions */
212   n_B = pcis->n_B; n_D = pcis->n - n_B;
213   n_R = pcis->n-n_vertices;
214 
215   /* Set types for local objects needed by BDDC precondtioner */
216   impMatType = MATSEQDENSE;
217 
218   /* Allocating some extra storage just to be safe */
219   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
220   for (i=0;i<pcis->n;i++) auxindices[i]=i;
221 
222   /* vertices in boundary numbering */
223   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
224   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
225   if (i != n_vertices) {
226     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
227   }
228 
229   /* Precompute stuffs needed for preprocessing and application of BDDC*/
230   if (n_constraints) {
231     /* see if we can save some allocations */
232     if (pcbddc->local_auxmat2) {
233       PetscInt on_R,on_constraints;
234       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
235       if (on_R != n_R || on_constraints != n_constraints) {
236         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
237         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
238       }
239     }
240     /* work vectors */
241     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
242     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
243     /* auxiliary matrices */
244     if (!pcbddc->local_auxmat2) {
245       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
246       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
247       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
248       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
249     }
250 
251     /* Extract constraints on R nodes: C_{CR}  */
252     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
253     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
254     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
255 
256     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
257     for (i=0;i<n_constraints;i++) {
258       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
259       /* Get row of constraint matrix in R numbering */
260       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
261       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
262       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
263       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
264       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
265       /* Solve for row of constraint matrix in R numbering */
266       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
267       /* Set values in local_auxmat2 */
268       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
269       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
270       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
271     }
272     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
273     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
274     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
275 
276     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
277     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
278     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
279     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
280     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
281     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
282     ierr = MatSetUp(M1);CHKERRQ(ierr);
283     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
284     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
285     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
286     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
287     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
288     ierr = MatDestroy(&M2);CHKERRQ(ierr);
289     ierr = MatDestroy(&M3);CHKERRQ(ierr);
290     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
291     if (!pcbddc->local_auxmat1) {
292       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
293     } else {
294       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
295     }
296   }
297 
298   /* Get submatrices from subdomain matrix */
299   if (n_vertices) {
300     PetscInt ibs,mbs;
301     PetscBool issbaij;
302     Mat newmat;
303 
304     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
305     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
306     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
307     if (ibs != mbs) { /* need to convert to SEQAIJ */
308       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
309       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
310       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
311       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
312       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
313     } else {
314       /* this is safe */
315       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
316       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
317       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
318         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
319         /* which of the two approaches is faster? */
320         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
321         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
322         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
323         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
324         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
325       } else {
326         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
327         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
328       }
329     }
330     ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
331     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
332     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
333   }
334 
335   /* Matrix of coarse basis functions (local) */
336   if (pcbddc->coarse_phi_B) {
337     PetscInt on_B,on_primal;
338     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
339     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
340       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
341       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
342     }
343   }
344   if (pcbddc->coarse_phi_D) {
345     PetscInt on_D,on_primal;
346     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
347     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
348       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
349       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
350     }
351   }
352   if (!pcbddc->coarse_phi_B) {
353     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
354     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
355     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
356     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
357   }
358   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
359     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
360     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
361     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
362     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
363   }
364 
365   if (pcbddc->dbg_flag) {
366     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
367     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
368     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
369   }
370   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
371   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
372 
373   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
374 
375   /* vertices */
376   for (i=0;i<n_vertices;i++) {
377     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
378     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
379     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
380     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
381     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
382     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
383     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
384     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
385     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
386     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
387     if (n_constraints) {
388       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
389       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
390       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
391     }
392     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
393     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
394 
395     /* Set values in coarse basis function and subdomain part of coarse_mat */
396     /* coarse basis functions */
397     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
398     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
399     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
400     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
401     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
402     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
403     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
404     if (pcbddc->switch_static || pcbddc->dbg_flag) {
405       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
406       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
408       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
409       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
410     }
411     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
412     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
413     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
414     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
415     if (n_constraints) {
416       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
417       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
418       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
419     }
420 
421     /* check */
422     if (pcbddc->dbg_flag) {
423       /* assemble subdomain vector on local nodes */
424       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
425       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
426       if (n_R) {
427         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
428       }
429       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
430       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
431       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
432       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
433       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
434       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
435       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
436       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
437       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
438       if (n_constraints) {
439         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
440         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
441         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
442       }
443       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
444       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
445       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
446       /* check saddle point solution */
447       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
448       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
449       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
450       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
451       /* shift by the identity matrix */
452       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
453       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
454       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
455       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
456     }
457   }
458 
459   /* constraints */
460   for (i=0;i<n_constraints;i++) {
461     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
462     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
463     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
464     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
465     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
466     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
467     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
468     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
469     if (n_vertices) {
470       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
471     }
472     /* Set values in coarse basis function and subdomain part of coarse_mat */
473     /* coarse basis functions */
474     j = i+n_vertices; /* don't touch this! */
475     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
476     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
479     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
480     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
481     if (pcbddc->switch_static || pcbddc->dbg_flag) {
482       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
483       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
484       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
485       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
486       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
487     }
488     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
489     if (n_vertices) {
490       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
491       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
492       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
493     }
494     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
495     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
496     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
497 
498     if (pcbddc->dbg_flag) {
499       /* assemble subdomain vector on nodes */
500       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
501       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
502       if (n_R) {
503         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
504       }
505       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
506       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
507       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
508       /* assemble subdomain vector of lagrange multipliers */
509       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
510       if (n_vertices) {
511         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
512         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
513         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
514       }
515       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
516       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
517       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
518       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
519       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
520       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
521       /* check saddle point solution */
522       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
523       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
524       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
525       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
526       /* shift by the identity matrix */
527       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
528       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
529       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
530       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
531     }
532   }
533   /* call assembling routines for local coarse basis */
534   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
535   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
536   if (pcbddc->switch_static || pcbddc->dbg_flag) {
537     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
538     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
539   }
540 
541   /* compute other basis functions for non-symmetric problems */
542   /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/
543   { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */
544     PetscBool setsym;
545     ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr);
546     if (!setsym) pcbddc->issym = PETSC_FALSE;
547   }
548 
549   if (!pcbddc->issym) {
550     if (!pcbddc->coarse_psi_B) {
551       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
552       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
553       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
554       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
555     }
556     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
557       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
558       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
559       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
560       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
561     }
562     for (i=0;i<pcbddc->local_primal_size;i++) {
563       if (n_constraints) {
564         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
565         for (j=0;j<n_constraints;j++) {
566           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
567         }
568         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
569         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
570       }
571       if (i<n_vertices) {
572         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
573         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
574         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
575         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
576         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
577         if (n_constraints) {
578           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
579         }
580       } else {
581         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
582       }
583       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
584       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
585       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
586       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
587       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
588       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
589       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
590       if (i<n_vertices) {
591         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
592       }
593       if (pcbddc->switch_static || pcbddc->dbg_flag) {
594         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
595         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
596         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
597         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
598         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
599       }
600 
601       if (pcbddc->dbg_flag) {
602         /* assemble subdomain vector on nodes */
603         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
604         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
605         if (n_R) {
606           ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
607         }
608         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
609         if (i<n_vertices) {
610           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
611         }
612         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
613         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
614         /* assemble subdomain vector of lagrange multipliers */
615         for (j=0;j<pcbddc->local_primal_size;j++) {
616           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
617         }
618         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
619         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
620         /* check saddle point solution */
621         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
622         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
623         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
624         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
625         /* shift by the identity matrix */
626         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
627         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
628         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
629         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
630       }
631     }
632     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
633     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
634     if (pcbddc->switch_static || pcbddc->dbg_flag) {
635       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
636       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
637     }
638     unsymmetric_check = PETSC_TRUE;
639   } else { /* take references to already computed coarse basis */
640     unsymmetric_check = PETSC_FALSE;
641     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
642     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
643     if (pcbddc->coarse_phi_D) {
644       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
645       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
646     }
647   }
648   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
649   /* Checking coarse_sub_mat and coarse basis functios */
650   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
651   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
652   if (pcbddc->dbg_flag) {
653     Mat         coarse_sub_mat;
654     Mat         AUXMAT,TM1,TM2,TM3,TM4;
655     Mat         coarse_phi_D,coarse_phi_B;
656     Mat         coarse_psi_D,coarse_psi_B;
657     Mat         A_II,A_BB,A_IB,A_BI;
658     MatType     checkmattype=MATSEQAIJ;
659     PetscReal   real_value;
660 
661     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
662     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
663     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
664     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
665     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
666     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
667     if (unsymmetric_check) {
668       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
669       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
670     }
671     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
672 
673     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
674     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
675     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
676     if (unsymmetric_check) {
677       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
678       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
679       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
680       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
681       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
682       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
683       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
684       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
685       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
686       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
687       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
688       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
689     } else {
690       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
691       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
692       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
693       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
694       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
695       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
696       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
697       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
698     }
699     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
700     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
701     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
702     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
703     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
704     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
705     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
706     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
707     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
708     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
709     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
710     for (i=0;i<pcbddc->local_primal_size;i++) {
711       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
712     }
713     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
714     for (i=0;i<pcbddc->local_primal_size;i++) {
715       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
716     }
717     if (unsymmetric_check) {
718       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
719       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
720         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
721       }
722       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
723       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
724         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
725       }
726     }
727     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
728     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
729     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
730     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
731     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
732     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
733     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
734     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
735     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
736     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
737     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
738     if (unsymmetric_check) {
739       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
740       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
741     }
742     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
743     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
744     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
745     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
746   }
747   /* free memory */
748   if (n_vertices) {
749     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
750     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
751     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
752     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
753     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
754   }
755   if (n_constraints) {
756     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
757     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
758     ierr = MatDestroy(&M1);CHKERRQ(ierr);
759     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
760   }
761   ierr = PetscFree(auxindices);CHKERRQ(ierr);
762   /* get back data */
763   *coarse_submat_vals_n = coarse_submat_vals;
764   PetscFunctionReturn(0);
765 }
766 
767 #undef __FUNCT__
768 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
769 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
770 {
771   PC_IS*            pcis = (PC_IS*)(pc->data);
772   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
773   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
774   PetscBool         issbaij,isseqaij;
775   /* manage repeated solves */
776   MatReuse          reuse;
777   PetscErrorCode    ierr;
778 
779   PetscFunctionBegin;
780   if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) {
781     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
782   }
783   /* get mat flags */
784   reuse = MAT_INITIAL_MATRIX;
785   if (pc->setupcalled) {
786     if (pc->flag == SAME_NONZERO_PATTERN) {
787       reuse = MAT_REUSE_MATRIX;
788     } else {
789       reuse = MAT_INITIAL_MATRIX;
790     }
791   }
792   if (reuse == MAT_INITIAL_MATRIX) {
793     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
794     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
795     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
796     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
797     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
798   }
799 
800   /* transform local matrices if needed */
801   if (pcbddc->ChangeOfBasisMatrix) {
802     Mat       *change_mat_all;
803     IS        is_local,is_global;
804     PetscBool sorted;
805     PetscInt  *idxs_perm;
806 
807     ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr);
808     ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
809     ierr = ISDestroy(&is_local);CHKERRQ(ierr);
810     ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr);
811     if (!sorted) {
812       const PetscInt *idxs;
813       PetscInt *idxs_sorted,i;
814 
815       ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr);
816       ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr);
817       for (i=0;i<pcis->n;i++) {
818         idxs_perm[i] = i;
819       }
820       ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr);
821       ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr);
822       for (i=0;i<pcis->n;i++) {
823         idxs_sorted[i] = idxs[idxs_perm[i]];
824       }
825       ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr);
826       ierr = ISDestroy(&is_global);CHKERRQ(ierr);
827       ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr);
828     }
829 
830     /* get change of basis on the whole set of local dofs */
831     ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr);
832 
833     if (!sorted) {
834       Mat      new_mat;
835       IS       is_perm;
836       PetscInt *idxs,i;
837 
838       ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr);
839       for (i=0;i<pcis->n;i++) {
840         idxs[idxs_perm[i]] = i;
841       }
842       ierr = PetscFree(idxs_perm);CHKERRQ(ierr);
843       ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr);
844       ierr = ISSetPermutation(is_perm);CHKERRQ(ierr);
845       ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr);
846       ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr);
847       change_mat_all[0] = new_mat;
848       ierr = ISDestroy(&is_perm);CHKERRQ(ierr);
849     }
850 
851     /* check */
852     if (pcbddc->dbg_flag) {
853       Vec       x,x_change;
854       PetscReal error;
855 
856       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
857       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
858       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
859       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
860       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862       ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
863       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
864       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
865       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
866       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
867       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
868       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
869       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
870       ierr = VecDestroy(&x);CHKERRQ(ierr);
871       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
872     }
873 
874     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
875     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
876     if (isseqaij) {
877       ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
878     } else {
879       Mat work_mat;
880       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
881       ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
882       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
883     }
884     /*
885     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
886     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
887     */
888     ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr);
889     ierr = ISDestroy(&is_global);CHKERRQ(ierr);
890   } else {
891     /* without change of basis, the local matrix is unchanged */
892     if (!pcbddc->local_mat) {
893       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
894       pcbddc->local_mat = matis->A;
895     }
896   }
897 
898   /* get submatrices */
899   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
900   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
901   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
902   if (!issbaij) {
903     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
904     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
905   } else {
906     Mat newmat;
907     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
908     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
909     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
910     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
911   }
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
917 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
918 {
919   PC_IS*         pcis = (PC_IS*)(pc->data);
920   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
921   IS             is_aux1,is_aux2;
922   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
923   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
924   PetscInt       vbs,bs;
925   PetscBT        bitmask;
926   PetscErrorCode ierr;
927 
928   PetscFunctionBegin;
929   /*
930     No need to setup local scatters if
931       - primal space is unchanged
932         AND
933       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
934         AND
935       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
936   */
937   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
938     PetscFunctionReturn(0);
939   }
940   /* destroy old objects */
941   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
942   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
943   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
944   /* Set Non-overlapping dimensions */
945   n_B = pcis->n_B; n_D = pcis->n - n_B;
946   n_vertices = pcbddc->n_actual_vertices;
947   /* create auxiliary bitmask */
948   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
949   for (i=0;i<n_vertices;i++) {
950     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
951   }
952 
953   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
954   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
955   for (i=0, n_R=0; i<pcis->n; i++) {
956     if (!PetscBTLookup(bitmask,i)) {
957       idx_R_local[n_R] = i;
958       n_R++;
959     }
960   }
961 
962   /* Block code */
963   vbs = 1;
964   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
965   if (bs>1 && !(n_vertices%bs)) {
966     PetscBool is_blocked = PETSC_TRUE;
967     PetscInt  *vary;
968     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
969     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
970     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
971     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
972     for (i=0; i<n_vertices; i++) {
973       if (vary[i]!=0 && vary[i]!=bs) {
974         is_blocked = PETSC_FALSE;
975         break;
976       }
977     }
978     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
979       vbs = bs;
980       for (i=0;i<n_R/vbs;i++) {
981         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
982       }
983     }
984     ierr = PetscFree(vary);CHKERRQ(ierr);
985   }
986   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
987   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
988 
989   /* print some info if requested */
990   if (pcbddc->dbg_flag) {
991     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
992     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
993     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
994     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
995     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
996     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
997     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
998     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
999   }
1000 
1001   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1002   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1003   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1004   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1005   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1006   for (i=0; i<n_D; i++) {
1007     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1008   }
1009   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1010   for (i=0, j=0; i<n_R; i++) {
1011     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1012       aux_array1[j++] = i;
1013     }
1014   }
1015   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1016   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1017   for (i=0, j=0; i<n_B; i++) {
1018     if (!PetscBTLookup(bitmask,is_indices[i])) {
1019       aux_array2[j++] = i;
1020     }
1021   }
1022   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1023   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1024   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1025   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1026   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1027 
1028   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1029     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1030     for (i=0, j=0; i<n_R; i++) {
1031       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1032         aux_array1[j++] = i;
1033       }
1034     }
1035     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1036     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1037     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1038   }
1039   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1040   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1047 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1048 {
1049   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1050   PC_IS          *pcis = (PC_IS*)pc->data;
1051   PC             pc_temp;
1052   Mat            A_RR;
1053   MatReuse       reuse;
1054   PetscScalar    m_one = -1.0;
1055   PetscReal      value;
1056   PetscInt       n_D,n_R,ibs,mbs;
1057   PetscBool      use_exact,use_exact_reduced,issbaij;
1058   PetscErrorCode ierr;
1059   /* prefixes stuff */
1060   char           dir_prefix[256],neu_prefix[256],str_level[16];
1061   size_t         len;
1062 
1063   PetscFunctionBegin;
1064 
1065   /* compute prefixes */
1066   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1067   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1068   if (!pcbddc->current_level) {
1069     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1070     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1071     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1072     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1073   } else {
1074     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1075     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1076     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1077     len -= 15; /* remove "pc_bddc_coarse_" */
1078     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1079     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1080     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1081     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1082     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1083     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1084     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1085     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1086   }
1087 
1088   /* DIRICHLET PROBLEM */
1089   /* Matrix for Dirichlet problem is pcis->A_II */
1090   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1091   if (!pcbddc->ksp_D) { /* create object if not yet build */
1092     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1093     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1094     /* default */
1095     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1096     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1097     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1098     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1099     if (issbaij) {
1100       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1101     } else {
1102       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1103     }
1104     /* Allow user's customization */
1105     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1106     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1107   }
1108   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1109   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1110   if (!n_D) {
1111     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1112     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1113   }
1114   /* Set Up KSP for Dirichlet problem of BDDC */
1115   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1116   /* set ksp_D into pcis data */
1117   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1118   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1119   pcis->ksp_D = pcbddc->ksp_D;
1120 
1121   /* NEUMANN PROBLEM */
1122   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1123   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1124   if (pcbddc->ksp_R) { /* already created ksp */
1125     PetscInt nn_R;
1126     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1127     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1128     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1129     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1130       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1131       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1132       reuse = MAT_INITIAL_MATRIX;
1133     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1134       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1135         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1136         reuse = MAT_INITIAL_MATRIX;
1137       } else { /* safe to reuse the matrix */
1138         reuse = MAT_REUSE_MATRIX;
1139       }
1140     }
1141     /* last check */
1142     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1143       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1144       reuse = MAT_INITIAL_MATRIX;
1145     }
1146   } else { /* first time, so we need to create the matrix */
1147     reuse = MAT_INITIAL_MATRIX;
1148   }
1149   /* extract A_RR */
1150   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1151   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1152   if (ibs != mbs) {
1153     Mat newmat;
1154     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1155     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1156     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1157   } else {
1158     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1159   }
1160   if (!pcbddc->ksp_R) { /* create object if not present */
1161     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1162     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1163     /* default */
1164     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1165     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1166     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1167     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1168     if (issbaij) {
1169       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1170     } else {
1171       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1172     }
1173     /* Allow user's customization */
1174     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1175     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1176   }
1177   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1178   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1179   if (!n_R) {
1180     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1181     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1182   }
1183   /* Set Up KSP for Neumann problem of BDDC */
1184   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1185 
1186   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1187   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1188     /* Dirichlet */
1189     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1190     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1191     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1192     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1193     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1194     /* need to be adapted? */
1195     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1196     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1197     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1198     /* print info */
1199     if (pcbddc->dbg_flag) {
1200       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1201       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1202       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1203       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1204       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1205       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1206     }
1207     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1208       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1209     }
1210 
1211     /* Neumann */
1212     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1213     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1214     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1215     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1216     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1217     /* need to be adapted? */
1218     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1219     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1220     /* print info */
1221     if (pcbddc->dbg_flag) {
1222       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1223       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1224     }
1225     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1226       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1227     }
1228   }
1229   /* free Neumann problem's matrix */
1230   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1236 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1237 {
1238   PetscErrorCode ierr;
1239   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1240 
1241   PetscFunctionBegin;
1242   if (applytranspose) {
1243     if (pcbddc->local_auxmat1) {
1244       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1245       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1246     }
1247     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1248   } else {
1249     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1250     if (pcbddc->local_auxmat1) {
1251       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1252       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1253     }
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1259 #undef __FUNCT__
1260 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1261 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1262 {
1263   PetscErrorCode ierr;
1264   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1265   PC_IS*            pcis = (PC_IS*)  (pc->data);
1266   const PetscScalar zero = 0.0;
1267 
1268   PetscFunctionBegin;
1269   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1270   if (applytranspose) {
1271     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1272     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1273   } else {
1274     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1275     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1276   }
1277   /* start communications from local primal nodes to rhs of coarse solver */
1278   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1279   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1280   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1281 
1282   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1283   /* TODO remove null space when doing multilevel */
1284   if (pcbddc->coarse_ksp) {
1285     if (applytranspose) {
1286       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1287     } else {
1288       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1289     }
1290   }
1291 
1292   /* Local solution on R nodes */
1293   if (pcis->n) {
1294     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1295     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1296     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1297     if (pcbddc->switch_static) {
1298       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1299       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1300     }
1301     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1302     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1303     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1304     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305     if (pcbddc->switch_static) {
1306       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1307       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1308     }
1309   }
1310 
1311   /* communications from coarse sol to local primal nodes */
1312   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1313   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1314 
1315   /* Sum contributions from two levels */
1316   if (applytranspose) {
1317     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1318     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1319   } else {
1320     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1321     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1327 #undef __FUNCT__
1328 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1329 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1330 {
1331   PetscErrorCode ierr;
1332   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1333   PetscScalar    *array,*array2;
1334   Vec            from,to;
1335 
1336   PetscFunctionBegin;
1337   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1338     from = pcbddc->coarse_vec;
1339     to = pcbddc->vec1_P;
1340     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1341       Vec tvec;
1342       PetscInt lsize;
1343       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1344       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1345       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1346       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1347       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1348       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1349       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1350     }
1351   } else { /* from local to global -> put data in coarse right hand side */
1352     from = pcbddc->vec1_P;
1353     to = pcbddc->coarse_vec;
1354   }
1355   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1361 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1362 {
1363   PetscErrorCode ierr;
1364   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1365   PetscScalar    *array,*array2;
1366   Vec            from,to;
1367 
1368   PetscFunctionBegin;
1369   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1370     from = pcbddc->coarse_vec;
1371     to = pcbddc->vec1_P;
1372   } else { /* from local to global -> put data in coarse right hand side */
1373     from = pcbddc->vec1_P;
1374     to = pcbddc->coarse_vec;
1375   }
1376   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1377   if (smode == SCATTER_FORWARD) {
1378     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1379       Vec tvec;
1380       PetscInt lsize;
1381       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1382       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1383       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1384       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1385       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1386       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1387       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1388     }
1389   }
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 /* uncomment for testing purposes */
1394 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1395 #undef __FUNCT__
1396 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1397 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1398 {
1399   PetscErrorCode    ierr;
1400   PC_IS*            pcis = (PC_IS*)(pc->data);
1401   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1402   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1403   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1404   MatType           impMatType=MATSEQAIJ;
1405   /* one and zero */
1406   PetscScalar       one=1.0,zero=0.0;
1407   /* space to store constraints and their local indices */
1408   PetscScalar       *temp_quadrature_constraint;
1409   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1410   /* iterators */
1411   PetscInt          i,j,k,total_counts,temp_start_ptr;
1412   /* stuff to store connected components stored in pcbddc->mat_graph */
1413   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1414   PetscInt          n_ISForFaces,n_ISForEdges;
1415   /* near null space stuff */
1416   MatNullSpace      nearnullsp;
1417   const Vec         *nearnullvecs;
1418   Vec               *localnearnullsp;
1419   PetscBool         nnsp_has_cnst;
1420   PetscInt          nnsp_size;
1421   PetscScalar       *array;
1422   /* BLAS integers */
1423   PetscBLASInt      lwork,lierr;
1424   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1425   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1426   /* LAPACK working arrays for SVD or POD */
1427   PetscBool         skip_lapack;
1428   PetscScalar       *work;
1429   PetscReal         *singular_vals;
1430 #if defined(PETSC_USE_COMPLEX)
1431   PetscReal         *rwork;
1432 #endif
1433 #if defined(PETSC_MISSING_LAPACK_GESVD)
1434   PetscBLASInt      Blas_one_2=1;
1435   PetscScalar       *temp_basis,*correlation_mat;
1436 #else
1437   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1438   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1439 #endif
1440   /* reuse */
1441   PetscInt          olocal_primal_size;
1442   PetscInt          *oprimal_indices_local_idxs;
1443   /* change of basis */
1444   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1445   PetscBool         boolforchange,qr_needed;
1446   PetscBT           touched,change_basis,qr_needed_idx;
1447   /* auxiliary stuff */
1448   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1449   PetscInt          ncc,*gidxs,*permutation,*temp_indices_to_constraint_work;
1450   PetscScalar       *temp_quadrature_constraint_work;
1451   /* some quantities */
1452   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1453   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1454 
1455 
1456   PetscFunctionBegin;
1457   /* Destroy Mat objects computed previously */
1458   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1459   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1460   /* Get index sets for faces, edges and vertices from graph */
1461   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1462   /* free unneeded index sets */
1463   if (!pcbddc->use_vertices) {
1464     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1465   }
1466   if (!pcbddc->use_edges) {
1467     for (i=0;i<n_ISForEdges;i++) {
1468       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1469     }
1470     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1471     n_ISForEdges = 0;
1472   }
1473   if (!pcbddc->use_faces) {
1474     for (i=0;i<n_ISForFaces;i++) {
1475       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1476     }
1477     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1478     n_ISForFaces = 0;
1479   }
1480   /* HACKS (the following two blocks of code) */
1481   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1482     pcbddc->use_change_of_basis = PETSC_TRUE;
1483     if (!ISForEdges) {
1484       pcbddc->use_change_on_faces = PETSC_TRUE;
1485     }
1486   }
1487   if (pcbddc->NullSpace) {
1488     /* use_change_of_basis should be consistent among processors */
1489     PetscBool tbool[2],gbool[2];
1490     tbool [0] = pcbddc->use_change_of_basis;
1491     tbool [1] = pcbddc->use_change_on_faces;
1492     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1493     pcbddc->use_change_of_basis = gbool[0];
1494     pcbddc->use_change_on_faces = gbool[1];
1495   }
1496   /* print some info */
1497   if (pcbddc->dbg_flag) {
1498     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1499     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1500     i = 0;
1501     if (ISForVertices) {
1502       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1503     }
1504     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1505     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1506     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1507     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1508   }
1509   /* check if near null space is attached to global mat */
1510   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1511   if (nearnullsp) {
1512     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1513     /* remove any stored info */
1514     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1515     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1516     /* store information for BDDC solver reuse */
1517     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1518     pcbddc->onearnullspace = nearnullsp;
1519     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1520     for (i=0;i<nnsp_size;i++) {
1521       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1522     }
1523   } else { /* if near null space is not provided BDDC uses constants by default */
1524     nnsp_size = 0;
1525     nnsp_has_cnst = PETSC_TRUE;
1526   }
1527   /* get max number of constraints on a single cc */
1528   max_constraints = nnsp_size;
1529   if (nnsp_has_cnst) max_constraints++;
1530 
1531   /*
1532        Evaluate maximum storage size needed by the procedure
1533        - temp_indices will contain start index of each constraint stored as follows
1534        - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1535        - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1536        - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1537                                                                                                                                                          */
1538   total_counts = n_ISForFaces+n_ISForEdges;
1539   total_counts *= max_constraints;
1540   n_vertices = 0;
1541   if (ISForVertices) {
1542     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1543   }
1544   total_counts += n_vertices;
1545   ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1546   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1547   total_counts = 0;
1548   max_size_of_constraint = 0;
1549   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1550     if (i<n_ISForEdges) {
1551       used_IS = &ISForEdges[i];
1552     } else {
1553       used_IS = &ISForFaces[i-n_ISForEdges];
1554     }
1555     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1556     total_counts += j;
1557     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1558   }
1559   total_counts *= max_constraints;
1560   total_counts += n_vertices;
1561   ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1562   /* get local part of global near null space vectors */
1563   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1564   for (k=0;k<nnsp_size;k++) {
1565     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1566     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1567     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568   }
1569 
1570   /* whether or not to skip lapack calls */
1571   skip_lapack = PETSC_TRUE;
1572   if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1573 
1574   /* allocate some auxiliary stuff */
1575   if (!skip_lapack || pcbddc->use_qr_single) {
1576     ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1577   } else {
1578     gidxs = NULL;
1579     permutation = NULL;
1580     temp_indices_to_constraint_work = NULL;
1581     temp_quadrature_constraint_work = NULL;
1582   }
1583 
1584   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1585   if (!skip_lapack) {
1586     PetscScalar temp_work;
1587 
1588 #if defined(PETSC_MISSING_LAPACK_GESVD)
1589     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1590     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1591     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1592     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1593 #if defined(PETSC_USE_COMPLEX)
1594     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1595 #endif
1596     /* now we evaluate the optimal workspace using query with lwork=-1 */
1597     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1598     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1599     lwork = -1;
1600     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1601 #if !defined(PETSC_USE_COMPLEX)
1602     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1603 #else
1604     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1605 #endif
1606     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1607     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1608 #else /* on missing GESVD */
1609     /* SVD */
1610     PetscInt max_n,min_n;
1611     max_n = max_size_of_constraint;
1612     min_n = max_constraints;
1613     if (max_size_of_constraint < max_constraints) {
1614       min_n = max_size_of_constraint;
1615       max_n = max_constraints;
1616     }
1617     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1618 #if defined(PETSC_USE_COMPLEX)
1619     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1620 #endif
1621     /* now we evaluate the optimal workspace using query with lwork=-1 */
1622     lwork = -1;
1623     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1624     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1625     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1626     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1627 #if !defined(PETSC_USE_COMPLEX)
1628     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1629 #else
1630     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1631 #endif
1632     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1633     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1634 #endif /* on missing GESVD */
1635     /* Allocate optimal workspace */
1636     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1637     ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1638   }
1639   /* Now we can loop on constraining sets */
1640   total_counts = 0;
1641   temp_indices[0] = 0;
1642   /* vertices */
1643   if (ISForVertices) {
1644     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1645     if (nnsp_has_cnst) { /* consider all vertices */
1646       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1647       for (i=0;i<n_vertices;i++) {
1648         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1649         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1650         total_counts++;
1651       }
1652     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1653       PetscBool used_vertex;
1654       for (i=0;i<n_vertices;i++) {
1655         used_vertex = PETSC_FALSE;
1656         k = 0;
1657         while (!used_vertex && k<nnsp_size) {
1658           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1659           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1660             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1661             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1662             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1663             total_counts++;
1664             used_vertex = PETSC_TRUE;
1665           }
1666           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1667           k++;
1668         }
1669       }
1670     }
1671     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1672     n_vertices = total_counts;
1673   }
1674 
1675   /* edges and faces */
1676   for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1677     if (ncc<n_ISForEdges) {
1678       used_IS = &ISForEdges[ncc];
1679       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1680     } else {
1681       used_IS = &ISForFaces[ncc-n_ISForEdges];
1682       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1683     }
1684     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1685     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1686     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1687     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1688     /* change of basis should not be performed on local periodic nodes */
1689     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1690     if (nnsp_has_cnst) {
1691       PetscScalar quad_value;
1692       temp_constraints++;
1693       if (!pcbddc->use_nnsp_true) {
1694         quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1695       } else {
1696         quad_value = 1.0;
1697       }
1698       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1699       for (j=0;j<size_of_constraint;j++) {
1700         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1701       }
1702       /* sort by global ordering if using lapack subroutines */
1703       if (!skip_lapack || pcbddc->use_qr_single) {
1704         ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1705         for (j=0;j<size_of_constraint;j++) {
1706           permutation[j]=j;
1707         }
1708         ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1709         for (j=0;j<size_of_constraint;j++) {
1710           temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1711           temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1712         }
1713         ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1714         ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1715       }
1716       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1717       total_counts++;
1718     }
1719     for (k=0;k<nnsp_size;k++) {
1720       PetscReal real_value;
1721       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1722       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1723       for (j=0;j<size_of_constraint;j++) {
1724         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1725       }
1726       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1727       /* check if array is null on the connected component */
1728       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1729       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1730       if (real_value > 0.0) { /* keep indices and values */
1731         /* sort by global ordering if using lapack subroutines */
1732         if (!skip_lapack || pcbddc->use_qr_single) {
1733           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1734           for (j=0;j<size_of_constraint;j++) {
1735             permutation[j]=j;
1736           }
1737           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1738           for (j=0;j<size_of_constraint;j++) {
1739             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1740             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1741           }
1742           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1743           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1744         }
1745         temp_constraints++;
1746         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1747         total_counts++;
1748       }
1749     }
1750     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1751     valid_constraints = temp_constraints;
1752     if (!pcbddc->use_nnsp_true && temp_constraints) {
1753       if (temp_constraints == 1) { /* just normalize the constraint */
1754         PetscScalar norm;
1755         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1756         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1757         norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
1758         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1759       } else { /* perform SVD */
1760         PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1761 
1762 #if defined(PETSC_MISSING_LAPACK_GESVD)
1763         /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1764            POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1765            -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1766               the constraints basis will differ (by a complex factor with absolute value equal to 1)
1767               from that computed using LAPACKgesvd
1768            -> This is due to a different computation of eigenvectors in LAPACKheev
1769            -> The quality of the POD-computed basis will be the same */
1770         ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1771         /* Store upper triangular part of correlation matrix */
1772         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1773         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1774         for (j=0;j<temp_constraints;j++) {
1775           for (k=0;k<j+1;k++) {
1776             PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1777           }
1778         }
1779         /* compute eigenvalues and eigenvectors of correlation matrix */
1780         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1781         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1782 #if !defined(PETSC_USE_COMPLEX)
1783         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1784 #else
1785         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1786 #endif
1787         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1788         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1789         /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1790         j = 0;
1791         while (j < temp_constraints && singular_vals[j] < tol) j++;
1792         total_counts = total_counts-j;
1793         valid_constraints = temp_constraints-j;
1794         /* scale and copy POD basis into used quadrature memory */
1795         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1796         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1797         ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1798         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1799         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1800         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1801         if (j<temp_constraints) {
1802           PetscInt ii;
1803           for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1804           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1805           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1806           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1807           for (k=0;k<temp_constraints-j;k++) {
1808             for (ii=0;ii<size_of_constraint;ii++) {
1809               temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1810             }
1811           }
1812         }
1813 #else  /* on missing GESVD */
1814         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1815         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1816         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1817         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1818 #if !defined(PETSC_USE_COMPLEX)
1819         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1820 #else
1821         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1822 #endif
1823         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1824         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1825         /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1826         k = temp_constraints;
1827         if (k > size_of_constraint) k = size_of_constraint;
1828         j = 0;
1829         while (j < k && singular_vals[k-j-1] < tol) j++;
1830         valid_constraints = k-j;
1831         total_counts = total_counts-temp_constraints+valid_constraints;
1832 #endif /* on missing GESVD */
1833       }
1834     }
1835     /* setting change_of_basis flag is safe now */
1836     if (boolforchange) {
1837       for (j=0;j<valid_constraints;j++) {
1838         PetscBTSet(change_basis,total_counts-j-1);
1839       }
1840     }
1841   }
1842   /* free index sets of faces, edges and vertices */
1843   for (i=0;i<n_ISForFaces;i++) {
1844     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1845   }
1846   if (n_ISForFaces) {
1847     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1848   }
1849   for (i=0;i<n_ISForEdges;i++) {
1850     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1851   }
1852   if (n_ISForEdges) {
1853     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1854   }
1855   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1856   /* map temp_indices_to_constraint in boundary numbering */
1857   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1858   if (i != temp_indices[total_counts]) {
1859     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1860   }
1861 
1862   /* free workspace */
1863   ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
1864   if (!skip_lapack) {
1865     ierr = PetscFree(work);CHKERRQ(ierr);
1866 #if defined(PETSC_USE_COMPLEX)
1867     ierr = PetscFree(rwork);CHKERRQ(ierr);
1868 #endif
1869     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1870 #if defined(PETSC_MISSING_LAPACK_GESVD)
1871     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1872     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1873 #endif
1874   }
1875   for (k=0;k<nnsp_size;k++) {
1876     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1877   }
1878   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1879 
1880   /* set quantities in pcbddc data structure and store previous primal size */
1881   /* n_vertices defines the number of subdomain corners in the primal space */
1882   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1883   olocal_primal_size = pcbddc->local_primal_size;
1884   pcbddc->local_primal_size = total_counts;
1885   pcbddc->n_vertices = n_vertices;
1886   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1887 
1888   /* Create constraint matrix */
1889   /* The constraint matrix is used to compute the l2g map of primal dofs */
1890   /* so we need to set it up properly either with or without change of basis */
1891   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1892   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1893   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1894   /* array to compute a local numbering of constraints : vertices first then constraints */
1895   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1896   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1897   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1898   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1899   /* auxiliary stuff for basis change */
1900   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1901   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1902 
1903   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1904   total_primal_vertices=0;
1905   for (i=0;i<pcbddc->local_primal_size;i++) {
1906     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1907     if (size_of_constraint == 1) {
1908       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1909       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1910       aux_primal_minloc[total_primal_vertices]=0;
1911       total_primal_vertices++;
1912     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1913       PetscInt min_loc,min_index;
1914       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1915       /* find first untouched local node */
1916       k = 0;
1917       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1918       min_index = global_indices[k];
1919       min_loc = k;
1920       /* search the minimum among global nodes already untouched on the cc */
1921       for (k=1;k<size_of_constraint;k++) {
1922         /* there can be more than one constraint on a single connected component */
1923         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1924           min_index = global_indices[k];
1925           min_loc = k;
1926         }
1927       }
1928       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1929       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1930       aux_primal_minloc[total_primal_vertices]=min_loc;
1931       total_primal_vertices++;
1932     }
1933   }
1934   /* determine if a QR strategy is needed for change of basis */
1935   qr_needed = PETSC_FALSE;
1936   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1937   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1938     if (PetscBTLookup(change_basis,i)) {
1939       if (!pcbddc->use_qr_single) {
1940         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1941         j = 0;
1942         for (k=0;k<size_of_constraint;k++) {
1943           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1944             j++;
1945           }
1946         }
1947         /* found more than one primal dof on the cc */
1948         if (j > 1) {
1949           PetscBTSet(qr_needed_idx,i);
1950           qr_needed = PETSC_TRUE;
1951         }
1952       } else {
1953         PetscBTSet(qr_needed_idx,i);
1954         qr_needed = PETSC_TRUE;
1955       }
1956     }
1957   }
1958   /* free workspace */
1959   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1960 
1961   /* permute indices in order to have a sorted set of vertices */
1962   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1963 
1964   /* nonzero structure of constraint matrix */
1965   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1966   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1967   j=total_primal_vertices;
1968   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1969     if (!PetscBTLookup(change_basis,i)) {
1970       nnz[j]=temp_indices[i+1]-temp_indices[i];
1971       j++;
1972     }
1973   }
1974   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1975   ierr = PetscFree(nnz);CHKERRQ(ierr);
1976   /* set values in constraint matrix */
1977   for (i=0;i<total_primal_vertices;i++) {
1978     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1979   }
1980   total_counts = total_primal_vertices;
1981   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1982     if (!PetscBTLookup(change_basis,i)) {
1983       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1984       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1985       total_counts++;
1986     }
1987   }
1988   /* assembling */
1989   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1990   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1991   /*
1992   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1993   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1994   */
1995   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1996   if (pcbddc->use_change_of_basis) {
1997     /* dual and primal dofs on a single cc */
1998     PetscInt     dual_dofs,primal_dofs;
1999     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2000     PetscInt     primal_counter;
2001     /* working stuff for GEQRF */
2002     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2003     PetscBLASInt lqr_work;
2004     /* working stuff for UNGQR */
2005     PetscScalar  *gqr_work,lgqr_work_t;
2006     PetscBLASInt lgqr_work;
2007     /* working stuff for TRTRS */
2008     PetscScalar  *trs_rhs;
2009     PetscBLASInt Blas_NRHS;
2010     /* pointers for values insertion into change of basis matrix */
2011     PetscInt     *start_rows,*start_cols;
2012     PetscScalar  *start_vals;
2013     /* working stuff for values insertion */
2014     PetscBT      is_primal;
2015     /* matrix sizes */
2016     PetscInt     global_size,local_size;
2017     /* work array for nonzeros */
2018     PetscScalar  *nnz_array;
2019     /* temporary change of basis */
2020     Mat          localChangeOfBasisMatrix;
2021     /* auxiliary work for global change of basis */
2022     Vec          nnz_vec;
2023     PetscInt     *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz;
2024     PetscInt     nvtxs,*xadj,*adjncy,*idxs_mapped;
2025     PetscScalar  *vals;
2026     PetscBool    done;
2027 
2028     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2029     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2030     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
2031     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
2032 
2033     /* nonzeros for local mat */
2034     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
2035     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
2036     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2037       if (PetscBTLookup(change_basis,i)) {
2038         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2039         if (PetscBTLookup(qr_needed_idx,i)) {
2040           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2041         } else {
2042           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
2043           /* get local primal index on the cc */
2044           j = 0;
2045           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2046           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2047         }
2048       }
2049     }
2050     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2051     /* Set initial identity in the matrix */
2052     for (i=0;i<pcis->n_B;i++) {
2053       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2054     }
2055 
2056     if (pcbddc->dbg_flag) {
2057       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2058       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2059     }
2060 
2061 
2062     /* Now we loop on the constraints which need a change of basis */
2063     /*
2064        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2065        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2066 
2067        Basic blocks of change of basis matrix T computed by
2068 
2069           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2070 
2071             | 1        0   ...        0         s_1/S |
2072             | 0        1   ...        0         s_2/S |
2073             |              ...                        |
2074             | 0        ...            1     s_{n-1}/S |
2075             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2076 
2077             with S = \sum_{i=1}^n s_i^2
2078             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2079                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2080 
2081           - QR decomposition of constraints otherwise
2082     */
2083     if (qr_needed) {
2084       /* space to store Q */
2085       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2086       /* first we issue queries for optimal work */
2087       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2088       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2089       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2090       lqr_work = -1;
2091       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2092       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2093       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2094       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2095       lgqr_work = -1;
2096       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2097       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2098       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2099       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2100       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2101       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2102       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2103       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2104       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2105       /* array to store scaling factors for reflectors */
2106       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2107       /* array to store rhs and solution of triangular solver */
2108       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2109       /* allocating workspace for check */
2110       if (pcbddc->dbg_flag) {
2111         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2112       }
2113     }
2114     /* array to store whether a node is primal or not */
2115     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2116     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2117     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2118     if (i != total_primal_vertices) {
2119       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2120     }
2121     for (i=0;i<total_primal_vertices;i++) {
2122       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2123     }
2124     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2125 
2126     /* loop on constraints and see whether or not they need a change of basis and compute it */
2127     /* -> using implicit ordering contained in temp_indices data */
2128     total_counts = pcbddc->n_vertices;
2129     primal_counter = total_counts;
2130     while (total_counts<pcbddc->local_primal_size) {
2131       primal_dofs = 1;
2132       if (PetscBTLookup(change_basis,total_counts)) {
2133         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2134         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2135           primal_dofs++;
2136         }
2137         /* get constraint info */
2138         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2139         dual_dofs = size_of_constraint-primal_dofs;
2140 
2141         if (pcbddc->dbg_flag) {
2142           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2143         }
2144 
2145         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2146 
2147           /* copy quadrature constraints for change of basis check */
2148           if (pcbddc->dbg_flag) {
2149             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2150           }
2151           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2152           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2153 
2154           /* compute QR decomposition of constraints */
2155           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2156           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2157           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2158           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2159           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2160           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2161           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2162 
2163           /* explictly compute R^-T */
2164           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2165           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2166           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2167           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2168           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2169           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2170           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2171           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2172           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2173           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2174 
2175           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2176           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2177           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2178           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2179           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2180           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2181           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2182           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2183           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2184 
2185           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2186              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2187              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2188           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2189           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2190           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2191           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2192           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2193           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2194           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2195           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2196           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2197           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2198 
2199           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2200           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2201           /* insert cols for primal dofs */
2202           for (j=0;j<primal_dofs;j++) {
2203             start_vals = &qr_basis[j*size_of_constraint];
2204             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2205             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2206           }
2207           /* insert cols for dual dofs */
2208           for (j=0,k=0;j<dual_dofs;k++) {
2209             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2210               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2211               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2212               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2213               j++;
2214             }
2215           }
2216 
2217           /* check change of basis */
2218           if (pcbddc->dbg_flag) {
2219             PetscInt   ii,jj;
2220             PetscBool valid_qr=PETSC_TRUE;
2221             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2222             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2223             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2224             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2225             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2226             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2227             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2228             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2229             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2230             for (jj=0;jj<size_of_constraint;jj++) {
2231               for (ii=0;ii<primal_dofs;ii++) {
2232                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2233                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2234               }
2235             }
2236             if (!valid_qr) {
2237               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2238               for (jj=0;jj<size_of_constraint;jj++) {
2239                 for (ii=0;ii<primal_dofs;ii++) {
2240                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2241                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2242                   }
2243                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2244                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2245                   }
2246                 }
2247               }
2248             } else {
2249               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2250             }
2251           }
2252         } else { /* simple transformation block */
2253           PetscInt    row,col;
2254           PetscScalar val,norm;
2255 
2256           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2257           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2258           for (j=0;j<size_of_constraint;j++) {
2259             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2260             if (!PetscBTLookup(is_primal,row)) {
2261               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2262               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2263               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2264             } else {
2265               for (k=0;k<size_of_constraint;k++) {
2266                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2267                 if (row != col) {
2268                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2269                 } else {
2270                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2271                 }
2272                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2273               }
2274             }
2275           }
2276           if (pcbddc->dbg_flag) {
2277             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2278           }
2279         }
2280         /* increment primal counter */
2281         primal_counter += primal_dofs;
2282       } else {
2283         if (pcbddc->dbg_flag) {
2284           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2285         }
2286       }
2287       /* increment constraint counter total_counts */
2288       total_counts += primal_dofs;
2289     }
2290 
2291     /* free workspace */
2292     if (qr_needed) {
2293       if (pcbddc->dbg_flag) {
2294         ierr = PetscFree(work);CHKERRQ(ierr);
2295       }
2296       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2297       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2298       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2299       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2300       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2301     }
2302     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2303     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2304     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2305 
2306     /* assembling of global change of variable */
2307     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2308     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2309     ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2310     ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2311     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2312     ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr);
2313 
2314     /* nonzeros (overestimated) */
2315     ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr);
2316     ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr);
2317     ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr);
2318     for (i=0;i<pcis->n;i++) {
2319       nnz_array[i] = 1.0;
2320       idxs_all[i] = i;
2321     }
2322     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2323     for (i=0;i<pcis->n_B;i++) {
2324       nnz_array[idxs_B[i]] = nnz[i];
2325     }
2326     if (pcis->n) {
2327       ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr);
2328     }
2329     ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr);
2330     ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr);
2331     ierr = PetscFree(nnz);CHKERRQ(ierr);
2332     ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr);
2333     ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr);
2334     ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2335     for (i=0;i<local_size;i++) {
2336       d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size);
2337       o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size);
2338     }
2339     ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2340     ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr);
2341     ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2342     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2343 
2344     /* Set identity on dirichlet dofs */
2345     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2346     for (i=0;i<pcis->n-pcis->n_B;i++) {
2347       PetscScalar one=1.0;
2348       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr);
2349     }
2350     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2351 
2352     /* Set values at interface dofs */
2353     done = PETSC_TRUE;
2354     ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2355     if (!done) {
2356       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2357     }
2358     ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2359     ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr);
2360     ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr);
2361     for (i=0;i<nvtxs;i++) {
2362       PetscInt    row,*cols,ncols;
2363       PetscScalar *mat_vals;
2364 
2365       row = idxs_B[i];
2366       ncols = xadj[i+1]-xadj[i];
2367       cols = idxs_mapped+xadj[i];
2368       mat_vals = vals+xadj[i];
2369       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2370     }
2371     ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2372     if (!done) {
2373       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2374     }
2375     ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2376     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2377     ierr = PetscFree(idxs_mapped);CHKERRQ(ierr);
2378     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2379     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2380 
2381     /* check */
2382     if (pcbddc->dbg_flag) {
2383       PetscReal error;
2384       Vec       x,x_change;
2385 
2386       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2387       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2388       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2389       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2390       ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2391       ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2392       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
2393       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2394       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2395       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2396       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2397       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2398       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2399       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr);
2400       ierr = VecDestroy(&x);CHKERRQ(ierr);
2401       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2402     }
2403     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2404   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2405     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2406     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2407   }
2408 
2409   /* set up change of basis context */
2410   if (pcbddc->ChangeOfBasisMatrix) {
2411     PCBDDCChange_ctx change_ctx;
2412 
2413     if (!pcbddc->new_global_mat) {
2414       PetscInt global_size,local_size;
2415 
2416       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2417       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2418       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2419       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2420       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2421       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2422       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2423       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2424       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2425     } else {
2426       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2427       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2428       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2429     }
2430     if (!pcbddc->user_ChangeOfBasisMatrix) {
2431       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2432       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2433     } else {
2434       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2435       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2436     }
2437     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2438     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2439     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2440     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2441   }
2442 
2443   /* get indices in local ordering for vertices and constraints */
2444   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2445     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2446     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2447   }
2448   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2449   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2450   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2451   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2452   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2453   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2454   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2455   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2456   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2457   /* set quantities in PCBDDC data struct */
2458   pcbddc->n_actual_vertices = i;
2459   /* check if a new primal space has been introduced */
2460   pcbddc->new_primal_space_local = PETSC_TRUE;
2461   if (olocal_primal_size == pcbddc->local_primal_size) {
2462     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2463     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2464     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2465   }
2466   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2467   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2468 
2469   /* flush dbg viewer */
2470   if (pcbddc->dbg_flag) {
2471     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2472   }
2473 
2474   /* free workspace */
2475   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2476   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2477   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2478   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2479   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2480   ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2486 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2487 {
2488   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2489   PC_IS       *pcis = (PC_IS*)pc->data;
2490   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2491   PetscInt    ierr,i,vertex_size;
2492   PetscViewer viewer=pcbddc->dbg_viewer;
2493 
2494   PetscFunctionBegin;
2495   /* Reset previously computed graph */
2496   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2497   /* Init local Graph struct */
2498   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2499 
2500   /* Check validity of the csr graph passed in by the user */
2501   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2502     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2503   }
2504 
2505   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2506   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2507     Mat       mat_adj;
2508     PetscInt  *xadj,*adjncy;
2509     PetscInt  nvtxs;
2510     PetscBool flg_row=PETSC_TRUE;
2511 
2512     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2513     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2514     if (!flg_row) {
2515       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2516     }
2517     if (pcbddc->use_local_adj) {
2518       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2519       pcbddc->deluxe_compute_rowadj = PETSC_FALSE;
2520     } else { /* just compute subdomain's connected components */
2521       IS                     is_dummy;
2522       ISLocalToGlobalMapping l2gmap_dummy;
2523       PetscInt               j,sum;
2524       PetscInt               *cxadj,*cadjncy;
2525       const PetscInt         *idxs;
2526       PCBDDCGraph            graph;
2527       PetscBT                is_on_boundary;
2528 
2529       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2530       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2531       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2532       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2533       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2534       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2535       graph->xadj = xadj;
2536       graph->adjncy = adjncy;
2537       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2538       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2539 
2540       if (pcbddc->dbg_flag) {
2541         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2542         for (i=0;i<graph->ncc;i++) {
2543           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2544         }
2545         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2546       }
2547 
2548       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2549       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2550       for (i=0;i<pcis->n_B;i++) {
2551         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2552       }
2553       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2554 
2555       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2556       sum = 0;
2557       for (i=0;i<graph->ncc;i++) {
2558         PetscInt sizecc = 0;
2559         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2560           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2561             sizecc++;
2562           }
2563         }
2564         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2565           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2566             cxadj[graph->queue[j]] = sizecc;
2567           }
2568         }
2569         sum += sizecc*sizecc;
2570       }
2571       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2572       sum = 0;
2573       for (i=0;i<nvtxs;i++) {
2574         PetscInt temp = cxadj[i];
2575         cxadj[i] = sum;
2576         sum += temp;
2577       }
2578       cxadj[nvtxs] = sum;
2579       for (i=0;i<graph->ncc;i++) {
2580         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2581           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2582             PetscInt k,sizecc = 0;
2583             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2584               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2585                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2586                 sizecc++;
2587               }
2588             }
2589           }
2590         }
2591       }
2592       if (nvtxs) {
2593         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2594       } else {
2595         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2596         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2597       }
2598       graph->xadj = 0;
2599       graph->adjncy = 0;
2600       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2601       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2602     }
2603     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2604     if (!flg_row) {
2605       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2606     }
2607     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2608   }
2609 
2610   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2611   vertex_size = 1;
2612   if (pcbddc->user_provided_isfordofs) {
2613     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2614       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2615       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2616         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2617         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2618       }
2619       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2620       pcbddc->n_ISForDofs = 0;
2621       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2622     }
2623     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2624     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2625   } else {
2626     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2627       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2628       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2629       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2630         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2631       }
2632     }
2633   }
2634 
2635   /* Setup of Graph */
2636   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2637     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2638   }
2639   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2640     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2641   }
2642   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2643 
2644   /* Graph's connected components analysis */
2645   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2646 
2647   /* print some info to stdout */
2648   if (pcbddc->dbg_flag) {
2649     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2650   }
2651 
2652   /* mark topography has done */
2653   pcbddc->recompute_topography = PETSC_FALSE;
2654   PetscFunctionReturn(0);
2655 }
2656 
2657 #undef __FUNCT__
2658 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2659 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2660 {
2661   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2662   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2663   PetscErrorCode ierr;
2664 
2665   PetscFunctionBegin;
2666   n = 0;
2667   vertices = 0;
2668   if (pcbddc->ConstraintMatrix) {
2669     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2670     for (i=0;i<local_primal_size;i++) {
2671       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2672       if (size_of_constraint == 1) n++;
2673       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2674     }
2675     if (vertices_idx) {
2676       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2677       n = 0;
2678       for (i=0;i<local_primal_size;i++) {
2679         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2680         if (size_of_constraint == 1) {
2681           vertices[n++]=row_cmat_indices[0];
2682         }
2683         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2684       }
2685     }
2686   }
2687   *n_vertices = n;
2688   if (vertices_idx) *vertices_idx = vertices;
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 #undef __FUNCT__
2693 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2694 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2695 {
2696   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2697   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2698   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2699   PetscBT        touched;
2700   PetscErrorCode ierr;
2701 
2702     /* This function assumes that the number of local constraints per connected component
2703        is not greater than the number of nodes defined for the connected component
2704        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2705   PetscFunctionBegin;
2706   n = 0;
2707   constraints_index = 0;
2708   if (pcbddc->ConstraintMatrix) {
2709     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2710     max_size_of_constraint = 0;
2711     for (i=0;i<local_primal_size;i++) {
2712       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2713       if (size_of_constraint > 1) {
2714         n++;
2715       }
2716       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2717       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2718     }
2719     if (constraints_idx) {
2720       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2721       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2722       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2723       n = 0;
2724       for (i=0;i<local_primal_size;i++) {
2725         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2726         if (size_of_constraint > 1) {
2727           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2728           /* find first untouched local node */
2729           j = 0;
2730           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2731           min_index = row_cmat_global_indices[j];
2732           min_loc = j;
2733           /* search the minimum among nodes not yet touched on the connected component
2734              since there can be more than one constraint on a single cc */
2735           for (j=1;j<size_of_constraint;j++) {
2736             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2737               min_index = row_cmat_global_indices[j];
2738               min_loc = j;
2739             }
2740           }
2741           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2742           constraints_index[n++] = row_cmat_indices[min_loc];
2743         }
2744         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2745       }
2746       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2747       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2748     }
2749   }
2750   *n_constraints = n;
2751   if (constraints_idx) *constraints_idx = constraints_index;
2752   PetscFunctionReturn(0);
2753 }
2754 
2755 #undef __FUNCT__
2756 #define __FUNCT__ "PCBDDCSubsetNumbering"
2757 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2758 {
2759   Vec            local_vec,global_vec;
2760   IS             seqis,paris;
2761   VecScatter     scatter_ctx;
2762   PetscScalar    *array;
2763   PetscInt       *temp_global_dofs;
2764   PetscScalar    globalsum;
2765   PetscInt       i,j,s;
2766   PetscInt       nlocals,first_index,old_index,max_local;
2767   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2768   PetscMPIInt    *dof_sizes,*dof_displs;
2769   PetscBool      first_found;
2770   PetscErrorCode ierr;
2771 
2772   PetscFunctionBegin;
2773   /* mpi buffers */
2774   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2775   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2776   j = ( !rank_prec_comm ? size_prec_comm : 0);
2777   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2778   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2779   /* get maximum size of subset */
2780   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2781   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2782   max_local = 0;
2783   for (i=0;i<n_local_dofs;i++) {
2784     if (max_local < temp_global_dofs[i] ) {
2785       max_local = temp_global_dofs[i];
2786     }
2787   }
2788   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2789   max_global++;
2790   max_local = 0;
2791   for (i=0;i<n_local_dofs;i++) {
2792     if (max_local < local_dofs[i] ) {
2793       max_local = local_dofs[i];
2794     }
2795   }
2796   max_local++;
2797   /* allocate workspace */
2798   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2799   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2800   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2801   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2802   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2803   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2804   /* create scatter */
2805   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2806   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2807   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2808   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2809   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2810   /* init array */
2811   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2812   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2813   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2814   if (local_dofs_mult) {
2815     for (i=0;i<n_local_dofs;i++) {
2816       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2817     }
2818   } else {
2819     for (i=0;i<n_local_dofs;i++) {
2820       array[local_dofs[i]]=1.0;
2821     }
2822   }
2823   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2824   /* scatter into global vec and get total number of global dofs */
2825   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2826   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2827   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2828   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2829   /* Fill global_vec with cumulative function for global numbering */
2830   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2831   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2832   nlocals = 0;
2833   first_index = -1;
2834   first_found = PETSC_FALSE;
2835   for (i=0;i<s;i++) {
2836     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2837       first_found = PETSC_TRUE;
2838       first_index = i;
2839     }
2840     nlocals += (PetscInt)PetscRealPart(array[i]);
2841   }
2842   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2843   if (!rank_prec_comm) {
2844     dof_displs[0]=0;
2845     for (i=1;i<size_prec_comm;i++) {
2846       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2847     }
2848   }
2849   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2850   if (first_found) {
2851     array[first_index] += (PetscScalar)nlocals;
2852     old_index = first_index;
2853     for (i=first_index+1;i<s;i++) {
2854       if (PetscRealPart(array[i]) > 0.1) {
2855         array[i] += array[old_index];
2856         old_index = i;
2857       }
2858     }
2859   }
2860   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2861   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2862   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2863   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2864   /* get global ordering of local dofs */
2865   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2866   if (local_dofs_mult) {
2867     for (i=0;i<n_local_dofs;i++) {
2868       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2869     }
2870   } else {
2871     for (i=0;i<n_local_dofs;i++) {
2872       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2873     }
2874   }
2875   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2876   /* free workspace */
2877   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2878   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2879   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2880   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2881   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2882   /* return pointer to global ordering of local dofs */
2883   *global_numbering_subset = temp_global_dofs;
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 #undef __FUNCT__
2888 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2889 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2890 {
2891   PetscInt       i,j;
2892   PetscScalar    *alphas;
2893   PetscErrorCode ierr;
2894 
2895   PetscFunctionBegin;
2896   /* this implements stabilized Gram-Schmidt */
2897   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2898   for (i=0;i<n;i++) {
2899     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2900     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2901     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2902   }
2903   ierr = PetscFree(alphas);CHKERRQ(ierr);
2904   PetscFunctionReturn(0);
2905 }
2906 
2907 #undef __FUNCT__
2908 #define __FUNCT__ "MatISGetSubassemblingPattern"
2909 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2910 {
2911   Mat             subdomain_adj;
2912   IS              new_ranks,ranks_send_to;
2913   MatPartitioning partitioner;
2914   Mat_IS          *matis;
2915   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2916   PetscInt        prank;
2917   PetscMPIInt     size,rank,color;
2918   PetscInt        *xadj,*adjncy,*oldranks;
2919   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2920   PetscInt        i,local_size,threshold=0;
2921   PetscErrorCode  ierr;
2922   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2923   PetscSubcomm    subcomm;
2924 
2925   PetscFunctionBegin;
2926   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2927   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2928   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2929 
2930   /* Get info on mapping */
2931   matis = (Mat_IS*)(mat->data);
2932   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2933   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2934 
2935   /* build local CSR graph of subdomains' connectivity */
2936   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2937   xadj[0] = 0;
2938   xadj[1] = PetscMax(n_neighs-1,0);
2939   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2940   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2941 
2942   if (threshold) {
2943     PetscInt xadj_count = 0;
2944     for (i=1;i<n_neighs;i++) {
2945       if (n_shared[i] > threshold) {
2946         adjncy[xadj_count] = neighs[i];
2947         adjncy_wgt[xadj_count] = n_shared[i];
2948         xadj_count++;
2949       }
2950     }
2951     xadj[1] = xadj_count;
2952   } else {
2953     if (xadj[1]) {
2954       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2955       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2956     }
2957   }
2958   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2959   if (use_square) {
2960     for (i=0;i<xadj[1];i++) {
2961       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2962     }
2963   }
2964   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2965 
2966   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
2967 
2968   /*
2969     Restrict work on active processes only.
2970   */
2971   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2972   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2973   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2974   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2975   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2976   if (color) {
2977     ierr = PetscFree(xadj);CHKERRQ(ierr);
2978     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2979     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2980   } else {
2981     PetscInt coarsening_ratio;
2982     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2983     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2984     prank = rank;
2985     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2986     /*
2987     for (i=0;i<size;i++) {
2988       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2989     }
2990     */
2991     for (i=0;i<xadj[1];i++) {
2992       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2993     }
2994     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2995     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2996     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2997 
2998     /* Partition */
2999     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3000     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3001     if (use_vwgt) {
3002       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3003       v_wgt[0] = local_size;
3004       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3005     }
3006     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3007     coarsening_ratio = size/n_subdomains;
3008     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3009     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3010     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3011     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3012 
3013     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3014     if (contiguous) {
3015       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3016     } else {
3017       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3018     }
3019     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3020     /* clean up */
3021     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3022     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3023     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3024     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3025   }
3026   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3027 
3028   /* assemble parallel IS for sends */
3029   i = 1;
3030   if (color) i=0;
3031   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3032 
3033   /* get back IS */
3034   *is_sends = ranks_send_to;
3035   PetscFunctionReturn(0);
3036 }
3037 
3038 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3039 
3040 #undef __FUNCT__
3041 #define __FUNCT__ "MatISSubassemble"
3042 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3043 {
3044   Mat                    local_mat;
3045   Mat_IS                 *matis;
3046   IS                     is_sends_internal;
3047   PetscInt               rows,cols,new_local_rows;
3048   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3049   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3050   ISLocalToGlobalMapping l2gmap;
3051   PetscInt*              l2gmap_indices;
3052   const PetscInt*        is_indices;
3053   MatType                new_local_type;
3054   /* buffers */
3055   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3056   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3057   PetscInt               *recv_buffer_idxs_local;
3058   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3059   /* MPI */
3060   MPI_Comm               comm,comm_n;
3061   PetscSubcomm           subcomm;
3062   PetscMPIInt            n_sends,n_recvs,commsize;
3063   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3064   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3065   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3066   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3067   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3068   PetscErrorCode         ierr;
3069 
3070   PetscFunctionBegin;
3071   /* TODO: add missing checks */
3072   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3073   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3074   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3075   PetscValidLogicalCollectiveInt(mat,nis,7);
3076   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3077   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3078   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3079   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3080   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3081   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3082   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3083   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3084     PetscInt mrows,mcols,mnrows,mncols;
3085     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3086     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3087     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3088     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3089     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3090     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3091   }
3092   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3093   PetscValidLogicalCollectiveInt(mat,bs,0);
3094   /* prepare IS for sending if not provided */
3095   if (!is_sends) {
3096     PetscBool pcontig = PETSC_TRUE;
3097     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3098     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3099   } else {
3100     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3101     is_sends_internal = is_sends;
3102   }
3103 
3104   /* get pointer of MATIS data */
3105   matis = (Mat_IS*)mat->data;
3106 
3107   /* get comm */
3108   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3109 
3110   /* compute number of sends */
3111   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3112   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3113 
3114   /* compute number of receives */
3115   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3116   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3117   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3118   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3119   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3120   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3121   ierr = PetscFree(iflags);CHKERRQ(ierr);
3122 
3123   /* restrict comm if requested */
3124   subcomm = 0;
3125   destroy_mat = PETSC_FALSE;
3126   if (restrict_comm) {
3127     PetscMPIInt color,rank,subcommsize;
3128     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3129     color = 0;
3130     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3131     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3132     subcommsize = commsize - subcommsize;
3133     /* check if reuse has been requested */
3134     if (reuse == MAT_REUSE_MATRIX) {
3135       if (*mat_n) {
3136         PetscMPIInt subcommsize2;
3137         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3138         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3139         comm_n = PetscObjectComm((PetscObject)*mat_n);
3140       } else {
3141         comm_n = PETSC_COMM_SELF;
3142       }
3143     } else { /* MAT_INITIAL_MATRIX */
3144       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3145       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3146       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3147       comm_n = subcomm->comm;
3148     }
3149     /* flag to destroy *mat_n if not significative */
3150     if (color) destroy_mat = PETSC_TRUE;
3151   } else {
3152     comm_n = comm;
3153   }
3154 
3155   /* prepare send/receive buffers */
3156   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3157   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3158   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3159   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3160   if (nis) {
3161     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3162   }
3163 
3164   /* Get data from local matrices */
3165   if (!isdense) {
3166     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3167     /* TODO: See below some guidelines on how to prepare the local buffers */
3168     /*
3169        send_buffer_vals should contain the raw values of the local matrix
3170        send_buffer_idxs should contain:
3171        - MatType_PRIVATE type
3172        - PetscInt        size_of_l2gmap
3173        - PetscInt        global_row_indices[size_of_l2gmap]
3174        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3175     */
3176   } else {
3177     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3178     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3179     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3180     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3181     send_buffer_idxs[1] = i;
3182     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3183     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3184     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3185     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3186     for (i=0;i<n_sends;i++) {
3187       ilengths_vals[is_indices[i]] = len*len;
3188       ilengths_idxs[is_indices[i]] = len+2;
3189     }
3190   }
3191   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3192   /* additional is (if any) */
3193   if (nis) {
3194     PetscMPIInt psum;
3195     PetscInt j;
3196     for (j=0,psum=0;j<nis;j++) {
3197       PetscInt plen;
3198       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3199       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3200       psum += len+1; /* indices + lenght */
3201     }
3202     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3203     for (j=0,psum=0;j<nis;j++) {
3204       PetscInt plen;
3205       const PetscInt *is_array_idxs;
3206       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3207       send_buffer_idxs_is[psum] = plen;
3208       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3209       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3210       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3211       psum += plen+1; /* indices + lenght */
3212     }
3213     for (i=0;i<n_sends;i++) {
3214       ilengths_idxs_is[is_indices[i]] = psum;
3215     }
3216     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3217   }
3218 
3219   buf_size_idxs = 0;
3220   buf_size_vals = 0;
3221   buf_size_idxs_is = 0;
3222   for (i=0;i<n_recvs;i++) {
3223     buf_size_idxs += (PetscInt)olengths_idxs[i];
3224     buf_size_vals += (PetscInt)olengths_vals[i];
3225     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3226   }
3227   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3228   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3229   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3230 
3231   /* get new tags for clean communications */
3232   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3233   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3234   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3235 
3236   /* allocate for requests */
3237   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3238   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3239   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3240   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3241   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3242   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3243 
3244   /* communications */
3245   ptr_idxs = recv_buffer_idxs;
3246   ptr_vals = recv_buffer_vals;
3247   ptr_idxs_is = recv_buffer_idxs_is;
3248   for (i=0;i<n_recvs;i++) {
3249     source_dest = onodes[i];
3250     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3251     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3252     ptr_idxs += olengths_idxs[i];
3253     ptr_vals += olengths_vals[i];
3254     if (nis) {
3255       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3256       ptr_idxs_is += olengths_idxs_is[i];
3257     }
3258   }
3259   for (i=0;i<n_sends;i++) {
3260     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3261     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3262     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3263     if (nis) {
3264       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3265     }
3266   }
3267   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3268   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3269 
3270   /* assemble new l2g map */
3271   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3272   ptr_idxs = recv_buffer_idxs;
3273   new_local_rows = 0;
3274   for (i=0;i<n_recvs;i++) {
3275     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3276     ptr_idxs += olengths_idxs[i];
3277   }
3278   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3279   ptr_idxs = recv_buffer_idxs;
3280   new_local_rows = 0;
3281   for (i=0;i<n_recvs;i++) {
3282     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3283     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3284     ptr_idxs += olengths_idxs[i];
3285   }
3286   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3287   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3288   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3289 
3290   /* infer new local matrix type from received local matrices type */
3291   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3292   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3293   if (n_recvs) {
3294     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3295     ptr_idxs = recv_buffer_idxs;
3296     for (i=0;i<n_recvs;i++) {
3297       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3298         new_local_type_private = MATAIJ_PRIVATE;
3299         break;
3300       }
3301       ptr_idxs += olengths_idxs[i];
3302     }
3303     switch (new_local_type_private) {
3304       case MATDENSE_PRIVATE:
3305         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3306           new_local_type = MATSEQAIJ;
3307           bs = 1;
3308         } else { /* if I receive only 1 dense matrix */
3309           new_local_type = MATSEQDENSE;
3310           bs = 1;
3311         }
3312         break;
3313       case MATAIJ_PRIVATE:
3314         new_local_type = MATSEQAIJ;
3315         bs = 1;
3316         break;
3317       case MATBAIJ_PRIVATE:
3318         new_local_type = MATSEQBAIJ;
3319         break;
3320       case MATSBAIJ_PRIVATE:
3321         new_local_type = MATSEQSBAIJ;
3322         break;
3323       default:
3324         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3325         break;
3326     }
3327   } else { /* by default, new_local_type is seqdense */
3328     new_local_type = MATSEQDENSE;
3329     bs = 1;
3330   }
3331 
3332   /* create MATIS object if needed */
3333   if (reuse == MAT_INITIAL_MATRIX) {
3334     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3335     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3336   } else {
3337     /* it also destroys the local matrices */
3338     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3339   }
3340   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3341   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3342 
3343   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3344 
3345   /* Global to local map of received indices */
3346   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3347   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3348   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3349 
3350   /* restore attributes -> type of incoming data and its size */
3351   buf_size_idxs = 0;
3352   for (i=0;i<n_recvs;i++) {
3353     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3354     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3355     buf_size_idxs += (PetscInt)olengths_idxs[i];
3356   }
3357   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3358 
3359   /* set preallocation */
3360   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3361   if (!newisdense) {
3362     PetscInt *new_local_nnz=0;
3363 
3364     ptr_vals = recv_buffer_vals;
3365     ptr_idxs = recv_buffer_idxs_local;
3366     if (n_recvs) {
3367       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3368     }
3369     for (i=0;i<n_recvs;i++) {
3370       PetscInt j;
3371       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3372         for (j=0;j<*(ptr_idxs+1);j++) {
3373           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3374         }
3375       } else {
3376         /* TODO */
3377       }
3378       ptr_idxs += olengths_idxs[i];
3379     }
3380     if (new_local_nnz) {
3381       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3382       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3383       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3384       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3385       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3386       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3387     } else {
3388       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3389     }
3390     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3391   } else {
3392     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3393   }
3394 
3395   /* set values */
3396   ptr_vals = recv_buffer_vals;
3397   ptr_idxs = recv_buffer_idxs_local;
3398   for (i=0;i<n_recvs;i++) {
3399     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3400       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3401       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3402       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3403       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3404       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3405     } else {
3406       /* TODO */
3407     }
3408     ptr_idxs += olengths_idxs[i];
3409     ptr_vals += olengths_vals[i];
3410   }
3411   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3412   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3413   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3414   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3415   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3416   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3417 
3418 #if 0
3419   if (!restrict_comm) { /* check */
3420     Vec       lvec,rvec;
3421     PetscReal infty_error;
3422 
3423     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3424     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3425     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3426     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3427     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3428     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3429     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3430     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3431     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3432   }
3433 #endif
3434 
3435   /* assemble new additional is (if any) */
3436   if (nis) {
3437     PetscInt **temp_idxs,*count_is,j,psum;
3438 
3439     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3440     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3441     ptr_idxs = recv_buffer_idxs_is;
3442     psum = 0;
3443     for (i=0;i<n_recvs;i++) {
3444       for (j=0;j<nis;j++) {
3445         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3446         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3447         psum += plen;
3448         ptr_idxs += plen+1; /* shift pointer to received data */
3449       }
3450     }
3451     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3452     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3453     for (i=1;i<nis;i++) {
3454       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3455     }
3456     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3457     ptr_idxs = recv_buffer_idxs_is;
3458     for (i=0;i<n_recvs;i++) {
3459       for (j=0;j<nis;j++) {
3460         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3461         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3462         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3463         ptr_idxs += plen+1; /* shift pointer to received data */
3464       }
3465     }
3466     for (i=0;i<nis;i++) {
3467       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3468       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3469       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3470     }
3471     ierr = PetscFree(count_is);CHKERRQ(ierr);
3472     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3473     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3474   }
3475   /* free workspace */
3476   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3477   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3478   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3479   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3480   if (isdense) {
3481     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3482     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3483   } else {
3484     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3485   }
3486   if (nis) {
3487     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3488     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3489   }
3490   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3491   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3492   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3493   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3494   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3495   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3496   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3497   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3498   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3499   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3500   ierr = PetscFree(onodes);CHKERRQ(ierr);
3501   if (nis) {
3502     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3503     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3504     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3505   }
3506   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3507   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3508     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3509     for (i=0;i<nis;i++) {
3510       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3511     }
3512   }
3513   PetscFunctionReturn(0);
3514 }
3515 
3516 /* temporary hack into ksp private data structure */
3517 #include <petsc-private/kspimpl.h>
3518 
3519 #undef __FUNCT__
3520 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3521 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3522 {
3523   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3524   PC_IS                  *pcis = (PC_IS*)pc->data;
3525   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3526   MatNullSpace           CoarseNullSpace=NULL;
3527   ISLocalToGlobalMapping coarse_islg;
3528   IS                     coarse_is,*isarray;
3529   PetscInt               i,im_active=-1,active_procs=-1;
3530   PetscInt               nis,nisdofs,nisneu;
3531   PC                     pc_temp;
3532   PCType                 coarse_pc_type;
3533   KSPType                coarse_ksp_type;
3534   PetscBool              multilevel_requested,multilevel_allowed;
3535   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3536   Mat                    t_coarse_mat_is;
3537   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3538   PetscMPIInt            all_procs;
3539   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3540   PetscBool              compute_vecs = PETSC_FALSE;
3541   PetscScalar            *array;
3542   PetscErrorCode         ierr;
3543 
3544   PetscFunctionBegin;
3545   /* Assign global numbering to coarse dofs */
3546   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3547     compute_vecs = PETSC_TRUE;
3548     PetscInt ocoarse_size;
3549     ocoarse_size = pcbddc->coarse_size;
3550     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3551     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3552     /* see if we can avoid some work */
3553     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3554       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3555         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3556         coarse_reuse = PETSC_FALSE;
3557       } else { /* we can safely reuse already computed coarse matrix */
3558         coarse_reuse = PETSC_TRUE;
3559       }
3560     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3561       coarse_reuse = PETSC_FALSE;
3562     }
3563     /* reset any subassembling information */
3564     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3565     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3566   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3567     coarse_reuse = PETSC_TRUE;
3568   }
3569 
3570   /* count "active" (i.e. with positive local size) and "void" processes */
3571   im_active = !!(pcis->n);
3572   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3573   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3574   void_procs = all_procs-active_procs;
3575   csin_type_simple = PETSC_TRUE;
3576   redist = PETSC_FALSE;
3577   if (pcbddc->current_level && void_procs) {
3578     csin_ml = PETSC_TRUE;
3579     ncoarse_ml = void_procs;
3580     csin_ds = PETSC_TRUE;
3581     ncoarse_ds = void_procs;
3582   } else {
3583     csin_ml = PETSC_FALSE;
3584     ncoarse_ml = all_procs;
3585     if (void_procs) {
3586       csin_ds = PETSC_TRUE;
3587       ncoarse_ds = void_procs;
3588       csin_type_simple = PETSC_FALSE;
3589     } else {
3590       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3591         csin_ds = PETSC_TRUE;
3592         ncoarse_ds = pcbddc->redistribute_coarse;
3593         redist = PETSC_TRUE;
3594       } else {
3595         csin_ds = PETSC_FALSE;
3596         ncoarse_ds = all_procs;
3597       }
3598     }
3599   }
3600 
3601   /*
3602     test if we can go multilevel: three conditions must be satisfied:
3603     - we have not exceeded the number of levels requested
3604     - we can actually subassemble the active processes
3605     - we can find a suitable number of MPI processes where we can place the subassembled problem
3606   */
3607   multilevel_allowed = PETSC_FALSE;
3608   multilevel_requested = PETSC_FALSE;
3609   if (pcbddc->current_level < pcbddc->max_levels) {
3610     multilevel_requested = PETSC_TRUE;
3611     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3612       multilevel_allowed = PETSC_FALSE;
3613     } else {
3614       multilevel_allowed = PETSC_TRUE;
3615     }
3616   }
3617   /* determine number of process partecipating to coarse solver */
3618   if (multilevel_allowed) {
3619     ncoarse = ncoarse_ml;
3620     csin = csin_ml;
3621   } else {
3622     ncoarse = ncoarse_ds;
3623     csin = csin_ds;
3624   }
3625 
3626   /* creates temporary l2gmap and IS for coarse indexes */
3627   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3628   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3629 
3630   /* creates temporary MATIS object for coarse matrix */
3631   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3632   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3633   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3634   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3635 #if 0
3636   {
3637     PetscViewer viewer;
3638     char filename[256];
3639     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3640     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3641     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3642     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3643     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3644   }
3645 #endif
3646   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3647   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3648   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3649   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3650   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3651 
3652   /* compute dofs splitting and neumann boundaries for coarse dofs */
3653   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3654     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3655     const PetscInt         *idxs;
3656     ISLocalToGlobalMapping tmap;
3657 
3658     /* create map between primal indices (in local representative ordering) and local primal numbering */
3659     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3660     /* allocate space for temporary storage */
3661     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3662     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3663     /* allocate for IS array */
3664     nisdofs = pcbddc->n_ISForDofsLocal;
3665     nisneu = !!pcbddc->NeumannBoundariesLocal;
3666     nis = nisdofs + nisneu;
3667     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3668     /* dofs splitting */
3669     for (i=0;i<nisdofs;i++) {
3670       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3671       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3672       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3673       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3674       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3675       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3676       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3677       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3678     }
3679     /* neumann boundaries */
3680     if (pcbddc->NeumannBoundariesLocal) {
3681       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3682       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3683       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3684       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3685       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3686       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3687       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3688       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3689     }
3690     /* free memory */
3691     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3692     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3693     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3694   } else {
3695     nis = 0;
3696     nisdofs = 0;
3697     nisneu = 0;
3698     isarray = NULL;
3699   }
3700   /* destroy no longer needed map */
3701   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3702 
3703   /* restrict on coarse candidates (if needed) */
3704   coarse_mat_is = NULL;
3705   if (csin) {
3706     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3707       if (redist) {
3708         PetscMPIInt rank;
3709         PetscInt spc,n_spc_p1,dest[1];
3710 
3711         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3712         spc = all_procs/pcbddc->redistribute_coarse;
3713         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
3714         if (rank > n_spc_p1*(spc+1)-1) {
3715           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3716         } else {
3717           dest[0] = rank/(spc+1);
3718         }
3719         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3720       } else {
3721         PetscInt j,tissize,*nisindices;
3722         PetscInt *coarse_candidates;
3723         const PetscInt* tisindices;
3724         /* get coarse candidates' ranks in pc communicator */
3725         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
3726         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3727         for (i=0,j=0;i<all_procs;i++) {
3728           if (!coarse_candidates[i]) {
3729             coarse_candidates[j]=i;
3730             j++;
3731           }
3732         }
3733         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3734         /* get a suitable subassembling pattern */
3735         if (csin_type_simple) {
3736           PetscMPIInt rank;
3737           PetscInt    issize,isidx;
3738           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3739           if (im_active) {
3740             issize = 1;
3741             isidx = (PetscInt)rank;
3742           } else {
3743             issize = 0;
3744             isidx = -1;
3745           }
3746           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3747         } else {
3748           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3749         }
3750         if (pcbddc->dbg_flag) {
3751           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3752           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3753           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3754           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3755           for (i=0;i<j;i++) {
3756             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3757           }
3758           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3759           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3760         }
3761         /* shift the pattern on coarse candidates */
3762         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3763         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3764         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
3765         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3766         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3767         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3768         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3769       }
3770     }
3771     if (pcbddc->dbg_flag) {
3772       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3773       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3774       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3775       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3776     }
3777     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3778     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3779   } else {
3780     if (pcbddc->dbg_flag) {
3781       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3782       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3783       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3784     }
3785     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3786     coarse_mat_is = t_coarse_mat_is;
3787   }
3788 
3789   /* create local to global scatters for coarse problem */
3790   if (compute_vecs) {
3791     PetscInt lrows;
3792     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3793     if (coarse_mat_is) {
3794       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3795     } else {
3796       lrows = 0;
3797     }
3798     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3799     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3800     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3801     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3802     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3803   }
3804   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3805   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3806 
3807   /* set defaults for coarse KSP and PC */
3808   if (multilevel_allowed) {
3809     coarse_ksp_type = KSPRICHARDSON;
3810     coarse_pc_type = PCBDDC;
3811   } else {
3812     coarse_ksp_type = KSPPREONLY;
3813     coarse_pc_type = PCREDUNDANT;
3814   }
3815 
3816   /* print some info if requested */
3817   if (pcbddc->dbg_flag) {
3818     if (!multilevel_allowed) {
3819       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3820       if (multilevel_requested) {
3821         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3822       } else if (pcbddc->max_levels) {
3823         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3824       }
3825       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3826     }
3827   }
3828 
3829   /* create the coarse KSP object only once with defaults */
3830   if (coarse_mat_is) {
3831     MatReuse coarse_mat_reuse;
3832     PetscViewer dbg_viewer = NULL;
3833     if (pcbddc->dbg_flag) {
3834       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3835       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3836     }
3837     if (!pcbddc->coarse_ksp) {
3838       char prefix[256],str_level[16];
3839       size_t len;
3840       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3841       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3842       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3843       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3844       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3845       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3846       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3847       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3848       /* prefix */
3849       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3850       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3851       if (!pcbddc->current_level) {
3852         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3853         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3854       } else {
3855         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3856         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3857         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3858         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3859         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3860         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3861       }
3862       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3863       /* allow user customization */
3864       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3865       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3866     }
3867 
3868     /* get some info after set from options */
3869     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3870     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3871     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3872     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3873     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3874       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3875       isbddc = PETSC_FALSE;
3876     }
3877     if (isredundant) {
3878       KSP inner_ksp;
3879       PC inner_pc;
3880       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3881       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3882       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3883     }
3884 
3885     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3886     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3887     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3888     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3889     if (nisdofs) {
3890       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3891       for (i=0;i<nisdofs;i++) {
3892         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3893       }
3894     }
3895     if (nisneu) {
3896       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3897       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3898     }
3899 
3900     /* assemble coarse matrix */
3901     if (coarse_reuse) {
3902       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3903       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3904       coarse_mat_reuse = MAT_REUSE_MATRIX;
3905     } else {
3906       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3907     }
3908     if (isbddc || isnn) {
3909       if (pcbddc->coarsening_ratio > 1) {
3910         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3911           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3912           if (pcbddc->dbg_flag) {
3913             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3914             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3915             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3916             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3917           }
3918         }
3919         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3920       } else {
3921         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
3922         coarse_mat = coarse_mat_is;
3923       }
3924     } else {
3925       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3926     }
3927     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3928 
3929     /* propagate symmetry info to coarse matrix */
3930     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3931     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3932 
3933     /* set operators */
3934     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3935     if (pcbddc->dbg_flag) {
3936       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3937     }
3938   } else { /* processes non partecipating to coarse solver (if any) */
3939     coarse_mat = 0;
3940   }
3941   ierr = PetscFree(isarray);CHKERRQ(ierr);
3942 #if 0
3943   {
3944     PetscViewer viewer;
3945     char filename[256];
3946     sprintf(filename,"coarse_mat.m");
3947     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3948     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3949     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3950     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3951   }
3952 #endif
3953 
3954   /* Compute coarse null space (special handling by BDDC only) */
3955   if (pcbddc->NullSpace) {
3956     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3957   }
3958 
3959   if (pcbddc->coarse_ksp) {
3960     Vec crhs,csol;
3961     PetscBool ispreonly;
3962     if (CoarseNullSpace) {
3963       if (isbddc) {
3964         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3965       } else {
3966         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3967       }
3968     }
3969     /* setup coarse ksp */
3970     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3971     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3972     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3973     /* hack */
3974     if (!csol) {
3975       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3976     }
3977     if (!crhs) {
3978       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3979     }
3980     /* Check coarse problem if in debug mode or if solving with an iterative method */
3981     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3982     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3983       KSP       check_ksp;
3984       KSPType   check_ksp_type;
3985       PC        check_pc;
3986       Vec       check_vec,coarse_vec;
3987       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3988       PetscInt  its;
3989       PetscBool compute_eigs;
3990       PetscReal *eigs_r,*eigs_c;
3991       PetscInt  neigs;
3992       const char *prefix;
3993 
3994       /* Create ksp object suitable for estimation of extreme eigenvalues */
3995       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3996       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3997       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3998       if (ispreonly) {
3999         check_ksp_type = KSPPREONLY;
4000         compute_eigs = PETSC_FALSE;
4001       } else {
4002         check_ksp_type = KSPGMRES;
4003         compute_eigs = PETSC_TRUE;
4004       }
4005       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4006       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4007       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4008       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4009       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4010       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4011       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4012       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4013       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4014       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4015       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4016       /* create random vec */
4017       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4018       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4019       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4020       if (CoarseNullSpace) {
4021         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4022       }
4023       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4024       /* solve coarse problem */
4025       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4026       if (CoarseNullSpace) {
4027         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4028       }
4029       /* set eigenvalue estimation if preonly has not been requested */
4030       if (compute_eigs) {
4031         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4032         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4033         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4034         lambda_max = eigs_r[neigs-1];
4035         lambda_min = eigs_r[0];
4036         if (pcbddc->use_coarse_estimates) {
4037           if (lambda_max>lambda_min) {
4038             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4039             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4040           }
4041         }
4042       }
4043 
4044       /* check coarse problem residual error */
4045       if (pcbddc->dbg_flag) {
4046         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4047         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4048         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4049         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4050         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4051         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4052         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4053         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4054         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4055         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4056         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4057         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4058         if (compute_eigs) {
4059           PetscReal lambda_max_s,lambda_min_s;
4060           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4061           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4062           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4063           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4064           for (i=0;i<neigs;i++) {
4065             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4066           }
4067         }
4068         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4069         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4070       }
4071       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4072       if (compute_eigs) {
4073         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4074         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4075       }
4076     }
4077   }
4078   /* print additional info */
4079   if (pcbddc->dbg_flag) {
4080     /* waits until all processes reaches this point */
4081     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4082     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4083     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4084   }
4085 
4086   /* free memory */
4087   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4088   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4089   PetscFunctionReturn(0);
4090 }
4091 
4092 #undef __FUNCT__
4093 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4094 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4095 {
4096   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4097   PC_IS*         pcis = (PC_IS*)pc->data;
4098   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4099   PetscInt       i,coarse_size;
4100   PetscInt       *local_primal_indices;
4101   PetscErrorCode ierr;
4102 
4103   PetscFunctionBegin;
4104   /* Compute global number of coarse dofs */
4105   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4106     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4107   }
4108   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4109 
4110   /* check numbering */
4111   if (pcbddc->dbg_flag) {
4112     PetscScalar coarsesum,*array;
4113     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4114 
4115     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4116     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4117     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4118     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4119     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4120     for (i=0;i<pcbddc->local_primal_size;i++) {
4121       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4122     }
4123     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4124     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4125     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4126     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4127     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4128     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4129     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4130     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4131     for (i=0;i<pcis->n;i++) {
4132       if (array[i] == 1.0) {
4133         set_error = PETSC_TRUE;
4134         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4135       }
4136     }
4137     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4138     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4139     for (i=0;i<pcis->n;i++) {
4140       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4141     }
4142     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4143     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4144     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4145     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4146     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4147     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4148     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4149       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4150       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4151       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4152       for (i=0;i<pcbddc->local_primal_size;i++) {
4153         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4154       }
4155       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4156     }
4157     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4158     if (set_error_reduced) {
4159       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4160     }
4161   }
4162   /* get back data */
4163   *coarse_size_n = coarse_size;
4164   *local_primal_indices_n = local_primal_indices;
4165   PetscFunctionReturn(0);
4166 }
4167 
4168 #undef __FUNCT__
4169 #define __FUNCT__ "PCBDDCGlobalToLocal"
4170 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4171 {
4172   IS             localis_t;
4173   PetscInt       i,lsize,*idxs,n;
4174   PetscScalar    *vals;
4175   PetscErrorCode ierr;
4176 
4177   PetscFunctionBegin;
4178   /* get indices in local ordering exploiting local to global map */
4179   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4180   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4181   for (i=0;i<lsize;i++) vals[i] = 1.0;
4182   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4183   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4184   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4185   if (idxs) { /* multilevel guard */
4186     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4187   }
4188   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4189   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4190   ierr = PetscFree(vals);CHKERRQ(ierr);
4191   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4192   /* now compute set in local ordering */
4193   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4194   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4195   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4196   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4197   for (i=0,lsize=0;i<n;i++) {
4198     if (PetscRealPart(vals[i]) > 0.5) {
4199       lsize++;
4200     }
4201   }
4202   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4203   for (i=0,lsize=0;i<n;i++) {
4204     if (PetscRealPart(vals[i]) > 0.5) {
4205       idxs[lsize++] = i;
4206     }
4207   }
4208   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4209   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4210   *localis = localis_t;
4211   PetscFunctionReturn(0);
4212 }
4213 
4214 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4215 #undef __FUNCT__
4216 #define __FUNCT__ "PCBDDCMatMult_Private"
4217 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4218 {
4219   PCBDDCChange_ctx change_ctx;
4220   PetscErrorCode   ierr;
4221 
4222   PetscFunctionBegin;
4223   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4224   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4225   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4226   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4227   PetscFunctionReturn(0);
4228 }
4229 
4230 #undef __FUNCT__
4231 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4232 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4233 {
4234   PCBDDCChange_ctx change_ctx;
4235   PetscErrorCode   ierr;
4236 
4237   PetscFunctionBegin;
4238   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4239   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4240   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4241   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4242   PetscFunctionReturn(0);
4243 }
4244